def test_download_to_object_version(db, bucket):
    """Test download to object version task."""
    with mock.patch('requests.get') as mock_request:
        obj = ObjectVersion.create(bucket=bucket, key='test.pdf')
        bid = bucket.id
        db.session.commit()

        # Mock download request
        file_size = 1024
        mock_request.return_value = type(
            'Response', (object, ), {
                'raw': BytesIO(b'\x00' * file_size),
                'headers': {'Content-Length': file_size}
            })
        assert obj.file is None

        task_s = DownloadTask().s('http://example.com/test.pdf',
                                  version_id=obj.version_id)
        # Download
        task = task_s.delay()
        assert ObjectVersion.query.count() == 1
        obj = ObjectVersion.query.first()
        assert obj.key == 'test.pdf'
        assert str(obj.version_id) == task.result
        assert obj.file
        assert obj.file.size == file_size
        assert Bucket.get(bid).size == file_size
        assert FileInstance.query.count() == 1

        # Undo
        DownloadTask().clean(version_id=obj.version_id)

        assert ObjectVersion.query.count() == 0
        assert FileInstance.query.count() == 1
        assert Bucket.get(bid).size == 0
def test_bucket_sync(app, db, dummy_location):
    """Test that a bucket is correctly synced."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename1").set_location("b1v11", 1, "achecksum")
    ObjectVersion.create(b1, "filename2").set_location("b1v12", 1, "achecksum")
    ObjectVersion.create(b1, "filename3").set_location("b1v13", 1, "achecksum")
    ObjectVersion.create(b2, "extra1").set_location("b2v11", 1, "achecksum")
    db.session.commit()

    b1.sync(b2)

    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 4

    ObjectVersion.delete(b1, "filename1")
    ObjectVersion.create(b2, "extra2").set_location("b2v12", 1, "achecksum")
    ObjectVersion.create(b2, "extra3").set_location("b2v13", 1, "achecksum")
    ObjectVersion.delete(b2, "extra3")
    db.session.commit()

    b1.sync(b2, delete_extras=True)

    assert ObjectVersion.get_by_bucket(b1).count() == 2
    assert ObjectVersion.get_by_bucket(b2).count() == 2
def test_bucket_sync_deleted(app, db, dummy_location):
    """Test bucket sync of a deleted bucket."""
    b1 = Bucket.create()
    b1.deleted = True
    db.session.commit()

    with pytest.raises(InvalidOperationError) as excinfo:
        b1.sync(Bucket.create())
    assert excinfo.value.get_body() != {}
Example #4
0
 def remove_bucket(self, force=False):
     """Remove the bucket."""
     if self.bucket:
         bucket = self.bucket
         self.unset_bucket()
         # TODO: not sure this makes sense???
         if force:
             bucket.remove()
         else:
             Bucket.delete(bucket.id)
def test_object_set_contents(app, db, dummy_location):
    """Test object set contents."""
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        assert obj.file_id is None
        assert FileInstance.query.count() == 0

        # Save a file.
        with open('LICENSE', 'rb') as fp:
            obj.set_contents(fp)

    # Assert size, location and checksum
    assert obj.file_id is not None
    assert obj.file.uri is not None
    assert obj.file.size == getsize('LICENSE')
    assert obj.file.checksum is not None
    assert b1.size == obj.file.size

    # Try to overwrite
    with db.session.begin_nested():
        with open('LICENSE', 'rb') as fp:
            pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp)

    # Save a new version with different content
    with db.session.begin_nested():
        obj2 = ObjectVersion.create(b1, "LICENSE")
        with open('README.rst', 'rb') as fp:
            obj2.set_contents(fp)

    assert obj2.file_id is not None and obj2.file_id != obj.file_id
    assert obj2.file.size == getsize('README.rst')
    assert obj2.file.uri != obj.file.uri
    assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size

    obj2.file.verify_checksum()
    assert obj2.file.last_check_at
    assert obj2.file.last_check is True
    old_checksum = obj2.file.checksum
    obj2.file.checksum = "md5:invalid"
    assert obj2.file.verify_checksum() is False

    previous_last_check = obj2.file.last_check
    previous_last_check_date = obj2.file.last_check_at
    with db.session.begin_nested():
        obj2.file.checksum = old_checksum
        obj2.file.uri = 'invalid'
    pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum)
    assert obj2.file.last_check == previous_last_check
    assert obj2.file.last_check_at == previous_last_check_date

    obj2.file.verify_checksum(throws=False)
    assert obj2.file.last_check is None
    assert obj2.file.last_check_at != previous_last_check_date
def test_object_set_contents(app, db, dummy_location):
    """Test object set contents."""
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        assert obj.file_id is None
        assert FileInstance.query.count() == 0

        # Save a file.
        with open('LICENSE', 'rb') as fp:
            obj.set_contents(fp)

    # Assert size, location and checksum
    assert obj.file_id is not None
    assert obj.file.uri is not None
    assert obj.file.size == getsize('LICENSE')
    assert obj.file.checksum is not None
    assert b1.size == obj.file.size

    # Try to overwrite
    with db.session.begin_nested():
        with open('LICENSE', 'rb') as fp:
            pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp)

    # Save a new version with different content
    with db.session.begin_nested():
        obj2 = ObjectVersion.create(b1, "LICENSE")
        with open('README.rst', 'rb') as fp:
            obj2.set_contents(fp)

    assert obj2.file_id is not None and obj2.file_id != obj.file_id
    assert obj2.file.size == getsize('README.rst')
    assert obj2.file.uri != obj.file.uri
    assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size

    obj2.file.verify_checksum()
    assert obj2.file.last_check_at
    assert obj2.file.last_check is True
    old_checksum = obj2.file.checksum
    obj2.file.checksum = "md5:invalid"
    assert obj2.file.verify_checksum() is False

    previous_last_check = obj2.file.last_check
    previous_last_check_date = obj2.file.last_check_at
    with db.session.begin_nested():
        obj2.file.checksum = old_checksum
        obj2.file.uri = 'invalid'
    pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum)
    assert obj2.file.last_check == previous_last_check
    assert obj2.file.last_check_at == previous_last_check_date

    obj2.file.verify_checksum(throws=False)
    assert obj2.file.last_check is None
    assert obj2.file.last_check_at != previous_last_check_date
 def post_delete(self, record, force=False):
     """Called after a record is deleted."""
     if self._delete:
         files = getattr(record, self.attr_name)
         if files is not None:
             if record.bucket:
                 bucket = record.bucket
                 if force:
                     record.bucket = None
                     record.bucket_id = None
                     bucket.remove()
                 else:
                     Bucket.delete(bucket.id)
def test_bucket_sync_new_object(app, db, dummy_location):
    """Test that a new file in src in synced to dest."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 0
    b1.sync(b2)
    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get(b2, "filename")
Example #9
0
def test_object_snapshot_deleted(app, db, dummy_location):
    """Deleted bucket."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    b2.deleted = True
    db.session.commit()

    b3 = b1.snapshot()
    assert b3.id != b1.id
    assert b3.locked is False

    # b2 is deleted.
    pytest.raises(InvalidOperationError, b2.snapshot)
Example #10
0
def test_object_snapshot_deleted(app, db, dummy_location):
    """Deleted bucket."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    b2.deleted = True
    db.session.commit()

    b3 = b1.snapshot()
    assert b3.id != b1.id
    assert b3.locked is False

    # b2 is deleted.
    pytest.raises(InvalidOperationError, b2.snapshot)
Example #11
0
def files():
    """Load files."""
    srcroot = dirname(dirname(__file__))
    d = current_app.config['DATADIR']
    if exists(d):
        shutil.rmtree(d)
    makedirs(d)

    # Clear data
    Part.query.delete()
    MultipartObject.query.delete()
    ObjectVersion.query.delete()
    Bucket.query.delete()
    FileInstance.query.delete()
    Location.query.delete()
    db.session.commit()

    # Create location
    loc = Location(name='local', uri=d, default=True)
    db.session.add(loc)
    db.session.commit()

    # Bucket 0
    b1 = Bucket.create(loc)
    b1.id = '00000000-0000-0000-0000-000000000000'
    for f in ['README.rst', 'LICENSE']:
        with open(join(srcroot, f), 'rb') as fp:
            ObjectVersion.create(b1, f, stream=fp)

    # Bucket 1
    b2 = Bucket.create(loc)
    b2.id = '11111111-1111-1111-1111-111111111111'
    k = 'AUTHORS.rst'
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)

    k = 'RELEASE-NOTES.rst'
    with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    ObjectVersion.delete(b2.id, k)

    # Bucket 2
    b2 = Bucket.create(loc)
    b2.id = '22222222-2222-2222-2222-222222222222'

    db.session.commit()
Example #12
0
def test_bucket_retrieval(app, db, dummy_location):
    """Test bucket get/create."""
    # Create two buckets
    with db.session.begin_nested():
        b1 = Bucket.create()
        Bucket.create()

    assert Bucket.all().count() == 2

    # Mark one as deleted.
    with db.session.begin_nested():
        b1.deleted = True

    assert Bucket.all().count() == 1
Example #13
0
def test_bucket_sync_delete_extras(app, db, dummy_location):
    """Test that an extra object in dest is deleted when syncing."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum")
    ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum")
    db.session.commit()

    b1.sync(b2, delete_extras=True)

    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert not ObjectVersion.get(b2, "extra-deleted")
Example #14
0
def test_bucket_retrieval(app, db, dummy_location):
    """Test bucket get/create."""
    # Create two buckets
    with db.session.begin_nested():
        b1 = Bucket.create()
        Bucket.create()

    assert Bucket.all().count() == 2

    # Mark one as deleted.
    with db.session.begin_nested():
        b1.deleted = True

    assert Bucket.all().count() == 1
Example #15
0
def files():
    """Load files."""
    srcroot = dirname(dirname(__file__))
    d = current_app.config['DATADIR']
    if exists(d):
        shutil.rmtree(d)
    makedirs(d)

    # Clear data
    Part.query.delete()
    MultipartObject.query.delete()
    ObjectVersion.query.delete()
    Bucket.query.delete()
    FileInstance.query.delete()
    Location.query.delete()
    db.session.commit()

    # Create location
    loc = Location(name='local', uri=d, default=True)
    db.session.commit()

    # Bucket 0
    b1 = Bucket.create(loc)
    b1.id = '00000000-0000-0000-0000-000000000000'
    for f in ['README.rst', 'LICENSE']:
        with open(join(srcroot, f), 'rb') as fp:
            ObjectVersion.create(b1, f, stream=fp)

    # Bucket 1
    b2 = Bucket.create(loc)
    b2.id = '11111111-1111-1111-1111-111111111111'
    k = 'AUTHORS.rst'
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)

    k = 'RELEASE-NOTES.rst'
    with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    ObjectVersion.delete(b2.id, k)

    # Bucket 2
    b2 = Bucket.create(loc)
    b2.id = '22222222-2222-2222-2222-222222222222'

    db.session.commit()
Example #16
0
def test_object_snapshot_deleted(app, db, dummy_location):
    """Test snapshot creation of a deleted bucket."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    b2.deleted = True
    db.session.commit()

    b3 = b1.snapshot()
    assert b3.id != b1.id
    assert b3.locked is False

    # b2 is deleted.
    with pytest.raises(InvalidOperationError) as excinfo:
        b2.snapshot()
    assert excinfo.value.get_body() != {}
def test_transfer_cp(db):
    """Test factories.transfer_cp function."""
    # first we create a record
    recid = uuid.uuid4()
    PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    record = Record.create({'title': 'record test'}, recid)
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Aaah! A headcrab!!!\n'
    record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['crab.txt'] = BytesIO(content)
    # test!
    rec_dir = join(tmppath, create_accessioned_id('1337', 'recid'))
    factories.transfer_cp(record.id, tmppath)
    assert isdir(rec_dir)
    assert isfile(join(rec_dir, 'crab.txt'))
    with open(join(rec_dir, 'crab.txt'), "r") as f:
        assert f.read() == content
    # finalization
    rmtree(tmppath)
Example #18
0
def test_SIP_files(db):
    """Test the files methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.files) == 0
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    # we attach it to the SIP
    sf = api_sip.attach_file(obj)
    db.session.commit()
    assert len(api_sip.files) == 1
    assert api_sip.files[0].filepath == 'test.txt'
    assert sip.sip_files[0].filepath == 'test.txt'
    # finalization
    rmtree(tmppath)
def test_pyfilesystemstorage(app, db, dummy_location):
    """Test pyfs storage."""
    # Create bucket and object
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        obj.file = FileInstance.create()

    storage = PyFilesystemStorage(obj.file, base_uri=obj.bucket.location.uri)
    counter = dict(size=0)

    def callback(total, size):
        counter['size'] = size

    data = b("this is some content")
    stream = BytesIO(data)
    loc, size, checksum = storage.save(stream, progress_callback=callback)

    # Verify checksum, size and location.
    m = hashlib.md5()
    m.update(data)
    assert "md5:{0}".format(m.hexdigest()) == checksum

    assert size == len(data)
    assert loc == join(
        dummy_location.uri,
        str(obj.file.id),
        "data")
def test_pyfilesystemstorage(app, db, dummy_location):
    """Test pyfs storage."""
    # Create bucket and object
    with db.session.begin_nested():
        b = Bucket.create()
        obj = ObjectVersion.create(b, "LICENSE")
        obj.file = FileInstance()
        db.session.add(obj.file)

    storage = PyFilesystemStorage(obj, obj.file)
    with open('LICENSE', 'rb') as fp:
        loc, size, checksum = storage.save(fp)

    # Verify checksum, size and location.
    with open('LICENSE', 'rb') as fp:
        m = hashlib.md5()
        m.update(fp.read())
        assert "md5:{0}".format(m.hexdigest()) == checksum

    assert size == getsize('LICENSE')
    assert size == getsize('LICENSE')
    assert loc == \
        join(
            dummy_location.uri,
            str(b.id),
            str(obj.version_id),
            "data")
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user,
                                  test_users):
    """Check that the storage class will redirect pid files."""
    pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047'
    with app.app_context():
        # Disable access control for this test
        tmp_location = Location.query.first()
        with db.session.begin_nested():
            bucket = Bucket.create(tmp_location, storage_class='B')
            pid_file = FileInstance.create()
            pid_file.set_uri(pid, 1, 0, storage_class='B')
            ObjectVersion.create(bucket, 'test.txt', pid_file.id)

        db.session.commit()
        url = url_for('invenio_files_rest.object_api',
                      bucket_id=bucket.id,
                      key='test.txt')
    try:
        with app.app_context():
            permission = current_files_rest.permission_factory
            current_files_rest.permission_factory = allow_all
        # Check that accessing the file redirects to the PID
        with app.test_client() as client:
            resp = client.get(url)
            assert resp.headers['Location'] == pid
            assert resp.status_code == 302
    finally:
        with app.app_context():
            current_files_rest.permission_factory = permission
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            default_location=Location.get_default()
        )

        try:
            schema = data.get("$schema", None) \
                .split('/schemas/', 1)[1]
        except (IndexError, AttributeError):
            return None

        if schema:
            _deposit_group = \
                next(
                    (depgroup
                     for dg, depgroup
                     in current_app.config.get('DEPOSIT_GROUPS').iteritems()
                     if schema in depgroup['schema']
                     ),
                    None
                )

            data["_experiment"] = _deposit_group.get("experiment", "Unknown")

        deposit = super(CAPDeposit, cls).create(data, id_=id_)

        add_owner_permissions(deposit.id)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Example #23
0
def test_verify_checksum(app, db, dummy_location):
    """Test celery tasks for checksum verification."""
    b1 = Bucket.create()
    with open('README.rst', 'rb') as fp:
        obj = ObjectVersion.create(b1, 'README.rst', stream=fp)
    db.session.commit()
    file_id = obj.file_id

    verify_checksum(str(file_id))

    f = FileInstance.query.get(file_id)
    assert f.last_check_at
    assert f.last_check is True

    f.uri = 'invalid'
    db.session.add(f)
    db.session.commit()
    pytest.raises(ResourceNotFoundError, verify_checksum, str(file_id),
                  throws=True)

    f = FileInstance.query.get(file_id)
    assert f.last_check is True

    verify_checksum(str(file_id), throws=False)
    f = FileInstance.query.get(file_id)
    assert f.last_check is None

    f.last_check = True
    db.session.add(f)
    db.session.commit()
    with pytest.raises(ResourceNotFoundError):
        verify_checksum(str(file_id), pessimistic=True)
    f = FileInstance.query.get(file_id)
    assert f.last_check is None
Example #24
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                request = urllib2.Request(file_['url'],
                                          headers=file_.get('headers', {}))
                f = urllib2.urlopen(request)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', obj, eng)
def test_SIP_files(db):
    """Test the files methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.files) == 0
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    # we attach it to the SIP
    sf = api_sip.attach_file(obj)
    db.session.commit()
    assert len(api_sip.files) == 1
    assert api_sip.files[0].filepath == 'test.txt'
    assert sip.sip_files[0].filepath == 'test.txt'
    # finalization
    rmtree(tmppath)
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user, test_users):
    """Check that the storage class will redirect pid files."""
    pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047'
    with app.app_context():
        # Disable access control for this test
        tmp_location = Location.query.first()
        with db.session.begin_nested():
            bucket = Bucket.create(tmp_location, storage_class='B')
            pid_file = FileInstance.create()
            pid_file.set_uri(pid, 1, 0, storage_class='B')
            ObjectVersion.create(bucket, 'test.txt', pid_file.id)

        db.session.commit()
        url = url_for('invenio_files_rest.object_api',
                        bucket_id=bucket.id,
                        key='test.txt')
    try:
        with app.app_context():
            permission = current_files_rest.permission_factory
            current_files_rest.permission_factory = allow_all
        # Check that accessing the file redirects to the PID
        with app.test_client() as client:
            resp = client.get(url)
            assert resp.headers['Location'] == pid
            assert resp.status_code == 302
    finally:
        with app.app_context():
            current_files_rest.permission_factory = permission
Example #27
0
def test_admin_views(app, db, dummy_location):
    """Test admin views."""
    app.config['SECRET_KEY'] = 'CHANGEME'
    InvenioAdmin(app, permission_factory=None, view_class_factory=lambda x: x)

    b1 = Bucket.create(location=dummy_location)
    obj = ObjectVersion.create(b1, 'test').set_location('placeuri', 1, 'chk')
    db.session.commit()

    with app.test_client() as client:
        res = client.get('/admin/bucket/')
        assert res.status_code == 200
        assert str(b1.id) in res.get_data(as_text=True)

        res = client.get('/admin/fileinstance/')
        assert res.status_code == 200
        assert str(obj.file_id) in res.get_data(as_text=True)

        res = client.get('/admin/location/')
        assert res.status_code == 200
        assert str(b1.location.name) in res.get_data(as_text=True)

        res = client.get('/admin/objectversion/')
        assert res.status_code == 200
        assert str(obj.version_id) in res.get_data(as_text=True)
Example #28
0
def test_object_delete(app, db, dummy_location):
    """Test object creation."""
    # Create three versions, with latest being a delete marker.
    with db.session.begin_nested():
        b1 = Bucket.create()
        ObjectVersion.create(b1, "test").set_location(
            "b1test1", 1, "achecksum")
        ObjectVersion.create(b1, "test").set_location(
            "b1test2", 1, "achecksum")
        obj_deleted = ObjectVersion.delete(b1, "test")

    assert ObjectVersion.query.count() == 3
    assert ObjectVersion.get(b1, "test") is None
    assert ObjectVersion.get_by_bucket(b1).count() == 0

    obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id)
    assert obj.deleted
    assert obj.file_id is None

    ObjectVersion.create(b1, "test").set_location(
        "b1test4", 1, "achecksum")

    assert ObjectVersion.query.count() == 4
    assert ObjectVersion.get(b1.id, "test") is not None
    assert ObjectVersion.get_by_bucket(b1.id).count() == 1
Example #29
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                headers = file_.get('headers', {})
                data = requests_retry_session().get(file_['url'],
                                                    headers=headers)

                if data.status_code != 200:
                    __halt_and_notify(
                        "Error during acquiring files.\nHTTP status: %d\nUrl: %s\nHeaders:%s"
                        % (data.status_code, file_['url'], headers), eng)

                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Example #30
0
    def create(cls, data, id_=None, **kwargs):
        """Create a CDS deposit.

        Adds bucket creation immediately on deposit creation.
        """
        if '_deposit' not in data:
            id_ = id_ or uuid.uuid4()
            cls.deposit_minter(id_, data)
        bucket = Bucket.create(location=Location.get_by_name(
            kwargs.get('bucket_location', 'default')))
        data['_buckets'] = {'deposit': str(bucket.id)}
        data.setdefault('_cds', {})
        data['_cds'].setdefault('state', {})
        data.setdefault('keywords', [])
        data.setdefault('license', [{
            'license': 'CERN',
            'material': '',
            'url': 'http://copyright.web.cern.ch',
        }])
        if '_access' not in data:
            data.setdefault('_access', {})
        deposit = super(CDSDeposit,
                        cls).create(data,
                                    id_=id_,
                                    validator=PartialDraft4Validator)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Example #31
0
def test_scheduling(app, test_communities, login_user):
    """Test that scheduling files happens properly."""
    with app.app_context():
        b1 = Bucket.create()
        objects = []
        for i in range(10):
            objects.append(
                ObjectVersion.create(b1, str(i), stream=BytesIO(b'test')))
        db.session.commit()

        # corrupt 1 file
        corrupted_file = objects[0].file
        with open(corrupted_file.uri, 'w') as file_writer:
            file_writer.write('modified content')

        # schedule all files
        schedule_all_files_for_checksum()

        # assert that all will be checked
        assert not corrupted_file.last_check
        for o in objects[1:]:
            assert o.file.last_check

        # make 1 file fail
        failed_file = objects[1].file
        failed_file.last_check = None

        # schedule all failed
        schedule_failed_checksum_files()
        # assert that 1 wiil run again
        assert failed_file.last_check
        assert not corrupted_file.last_check
Example #32
0
def test_verify_checksum(app, tmp_location):
    """Test that verify_checksum sets last_check=False
       if the checksum is different and last_check=None
       if it failed to calculate the checksum, e.g. an exception was raised."""
    with app.app_context():
        b1 = Bucket.create(tmp_location)
        objects = []
        for i in range(10):
            objects.append(
                ObjectVersion.create(b1, str(i), stream=BytesIO(b'test')))
        db.session.commit()

        for obj in objects:
            verify_checksum.apply([str(obj.file_id)])
            assert obj.file.last_check

        # assert that mismatches in md5 checksums are caught
        corrupted_file = objects[0].file
        with open(corrupted_file.uri, 'w') as file_writer:
            file_writer.write('modified content')

        verify_checksum.apply([str(corrupted_file.id)])
        assert corrupted_file.last_check is False

        # assert that when exceptions occur last_check=None
        failed_file = objects[1].file
        with patch.object(FileStorage,
                          'checksum') \
                as mock_check:
            mock_check.side_effect = KeyError()
            verify_checksum.apply_async(args=[str(failed_file.id)],
                                        kwargs={'throws': False})
            assert failed_file.last_check is None
Example #33
0
def put_file_into_bucket(bucket_id, key, stream, content_length):
    # TODO: refactor invenio_files_rest to have a proper API and use that one here
    from invenio_db import db
    from invenio_files_rest.models import Bucket, ObjectVersion
    from invenio_files_rest.views import need_bucket_permission
    from invenio_files_rest.errors import FileSizeError

    bucket = Bucket.get(bucket_id)
    if bucket is None:
        abort(404, 'Bucket does not exist.')

    # WARNING: this function should be isomorphic with
    #          invenio_files_rest.views:ObjectResource.create_object
    @need_bucket_permission('bucket-update')
    def create_object(bucket, key):
        size_limit = bucket.size_limit
        if size_limit and int(content_length or 0) > size_limit:
            desc = 'File size limit exceeded.' \
                if isinstance(size_limit, int) else size_limit.reason
            raise FileSizeError(description=desc)

        with db.session.begin_nested():
            obj = ObjectVersion.create(bucket, key)
            obj.set_contents(
                stream, size=content_length, size_limit=size_limit)
        db.session.commit()
        return obj

    return create_object(key=key, bucket=bucket)
Example #34
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Example #35
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {}))
                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Example #36
0
def put_file_into_bucket(bucket_id, key, stream, content_length):
    # TODO: refactor invenio_files_rest to have a proper API and use that one here
    from invenio_db import db
    from invenio_files_rest.models import Bucket, ObjectVersion
    from invenio_files_rest.views import need_bucket_permission
    from invenio_files_rest.errors import FileSizeError

    bucket = Bucket.get(bucket_id)
    if bucket is None:
        abort(404, 'Bucket does not exist.')

    # WARNING: this function should be isomorphic with
    #          invenio_files_rest.views:ObjectResource.create_object
    @need_bucket_permission('bucket-update')
    def create_object(bucket, key):
        size_limit = bucket.size_limit
        if size_limit and content_length > size_limit:
            desc = 'File size limit exceeded.' \
                if isinstance(size_limit, int) else size_limit.reason
            raise FileSizeError(description=desc)

        with db.session.begin_nested():
            obj = ObjectVersion.create(bucket, key)
            obj.set_contents(
                stream, size=content_length, size_limit=size_limit)
        db.session.commit()
        return obj

    return create_object(key=key, bucket=bucket)
Example #37
0
def test_object_restore(app, db, dummy_location):
    """Restore object."""
    f1 = FileInstance(uri="f1", size=1, checksum="mychecksum")
    f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2")
    db.session.add(f1)
    db.session.add(f2)
    b1 = Bucket.create()

    obj1 = ObjectVersion.create(b1, "test").set_file(f1)
    ObjectVersion.create(b1, "test").set_file(f2)
    obj_deleted = ObjectVersion.delete(b1, "test")
    db.session.commit()

    assert ObjectVersion.query.count() == 3
    # Cannot restore a deleted version.
    pytest.raises(InvalidOperationError, obj_deleted.restore)

    # Restore first version
    obj_new = obj1.restore()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert obj_new.is_head is True
    assert obj_new.version_id != obj1.version_id
    assert obj_new.key == obj1.key
    assert obj_new.file_id == obj1.file_id
    assert obj_new.bucket == obj1.bucket
Example #38
0
def test_object_delete(app, db, dummy_location):
    """Test object creation."""
    # Create three versions, with latest being a delete marker.
    with db.session.begin_nested():
        b1 = Bucket.create()
        ObjectVersion.create(b1, "test").set_location(
            "b1test1", 1, "achecksum")
        ObjectVersion.create(b1, "test").set_location(
            "b1test2", 1, "achecksum")
        obj_deleted = ObjectVersion.delete(b1, "test")

    assert ObjectVersion.query.count() == 3
    assert ObjectVersion.get(b1, "test") is None
    assert ObjectVersion.get_by_bucket(b1).count() == 0

    obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id)
    assert obj.is_deleted
    assert obj.file_id is None

    ObjectVersion.create(b1, "test").set_location(
        "b1test4", 1, "achecksum")

    assert ObjectVersion.query.count() == 4
    assert ObjectVersion.get(b1.id, "test") is not None
    assert ObjectVersion.get_by_bucket(b1.id).count() == 1
Example #39
0
def test_object_relink_all(app, db, dummy_location):
    """Test relinking files."""
    b1 = Bucket.create()
    obj1 = ObjectVersion.create(
        b1, "relink-test", stream=BytesIO(b('relinkthis')))
    ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na')))
    b1.snapshot()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert FileInstance.query.count() == 2

    fnew = FileInstance.create()
    fnew.copy_contents(obj1.file, default_location=b1.location.uri)
    db.session.commit()

    fold = obj1.file

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0

    ObjectVersion.relink_all(obj1.file, fnew)
    db.session.commit()

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
Example #40
0
def test_object_restore(app, db, dummy_location):
    """Restore object."""
    f1 = FileInstance(uri="f1", size=1, checksum="mychecksum")
    f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2")
    db.session.add(f1)
    db.session.add(f2)
    b1 = Bucket.create()

    obj1 = ObjectVersion.create(b1, "test").set_file(f1)
    ObjectVersion.create(b1, "test").set_file(f2)
    obj_deleted = ObjectVersion.delete(b1, "test")
    db.session.commit()

    assert ObjectVersion.query.count() == 3
    # Cannot restore a deleted version.
    pytest.raises(InvalidOperationError, obj_deleted.restore)

    # Restore first version
    obj_new = obj1.restore()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert obj_new.is_head is True
    assert obj_new.version_id != obj1.version_id
    assert obj_new.key == obj1.key
    assert obj_new.file_id == obj1.file_id
    assert obj_new.bucket == obj1.bucket
Example #41
0
File: api.py Project: zenodo/zenodo
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
def test_object_relink_all(app, db, dummy_location):
    """Test relinking files."""
    b1 = Bucket.create()
    obj1 = ObjectVersion.create(
        b1, "relink-test", stream=BytesIO(b('relinkthis')))
    ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na')))
    b1.snapshot()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert FileInstance.query.count() == 2

    fnew = FileInstance.create()
    fnew.copy_contents(obj1.file, location=b1.location)
    db.session.commit()

    fold = obj1.file

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0

    ObjectVersion.relink_all(obj1.file, fnew)
    db.session.commit()

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
Example #43
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))

                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
    def create_bucket(cls, data):
        """Create a bucket for this record.

        Override this method to provide more advanced bucket creation
        capabilities. This method may return a new or existing bucket, or may
        return None, in case no bucket should be created.
        """
        return Bucket.create()
Example #45
0
def test_object_get_by_bucket(app, db, dummy_location):
    """Test object listing."""
    b1 = Bucket.create()
    b2 = Bucket.create()

    # First version of object
    obj1_first = ObjectVersion.create(b1, "test")
    obj1_first.set_location("b1test1", 1, "achecksum")
    # Intermediate version which is a delete marker.
    obj1_intermediate = ObjectVersion.create(b1, "test")
    obj1_intermediate.set_location("b1test2", 1, "achecksum")
    # Latest version of object
    obj1_latest = ObjectVersion.create(b1, "test")
    obj1_latest.set_location("b1test3", 1, "achecksum")
    # Create objects in/not in same bucket using different key.
    ObjectVersion.create(b1, "another").set_location(
        "b1another1", 1, "achecksum")
    ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum")
    db.session.commit()

    # Sanity check
    assert ObjectVersion.query.count() == 5
    assert ObjectVersion.get(b1, "test")
    assert ObjectVersion.get(b1, "another")
    assert ObjectVersion.get(b2, "test")

    # Retrieve objects for a bucket with/without versions
    assert ObjectVersion.get_by_bucket(b1).count() == 2
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1

    # Assert order of returned objects (alphabetical)
    objs = ObjectVersion.get_by_bucket(b1.id).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"

    # Assert order of returned objects verions (creation date ascending)
    objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"
    assert objs[1].version_id == obj1_latest.version_id
    assert objs[2].key == "test"
    assert objs[2].version_id == obj1_intermediate.version_id
    assert objs[3].key == "test"
    assert objs[3].version_id == obj1_first.version_id
Example #46
0
def test_object_get_by_bucket(app, db, dummy_location):
    """Test object listing."""
    b1 = Bucket.create()
    b2 = Bucket.create()

    # First version of object
    obj1_first = ObjectVersion.create(b1, "test")
    obj1_first.set_location("b1test1", 1, "achecksum")
    # Intermediate version which is a delete marker.
    obj1_intermediate = ObjectVersion.create(b1, "test")
    obj1_intermediate.set_location("b1test2", 1, "achecksum")
    # Latest version of object
    obj1_latest = ObjectVersion.create(b1, "test")
    obj1_latest.set_location("b1test3", 1, "achecksum")
    # Create objects in/not in same bucket using different key.
    ObjectVersion.create(b1, "another").set_location(
        "b1another1", 1, "achecksum")
    ObjectVersion.create(b2, "test").set_location("b2test1", 1, "achecksum")
    db.session.commit()

    # Sanity check
    assert ObjectVersion.query.count() == 5
    assert ObjectVersion.get(b1, "test")
    assert ObjectVersion.get(b1, "another")
    assert ObjectVersion.get(b2, "test")

    # Retrieve objects for a bucket with/without versions
    assert ObjectVersion.get_by_bucket(b1).count() == 2
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 4
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b2, versions=True).count() == 1

    # Assert order of returned objects (alphabetical)
    objs = ObjectVersion.get_by_bucket(b1.id).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"

    # Assert order of returned objects verions (creation date ascending)
    objs = ObjectVersion.get_by_bucket(b1.id, versions=True).all()
    assert objs[0].key == "another"
    assert objs[1].key == "test"
    assert objs[1].version_id == obj1_latest.version_id
    assert objs[2].key == "test"
    assert objs[2].version_id == obj1_intermediate.version_id
    assert objs[3].key == "test"
    assert objs[3].version_id == obj1_first.version_id
Example #47
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Example #48
0
 def delete_buckets(record):
     """Delete the bucket."""
     files = record.get('files', [])
     buckets = set()
     for f in files:
         buckets.add(f.get('bucket'))
     for b_id in buckets:
         b = Bucket.get(b_id)
         b.deleted = True
Example #49
0
def test_object_create(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        # Create one object version
        obj1 = ObjectVersion.create(b, "test")
        assert obj1.bucket_id == b.id
        assert obj1.key == 'test'
        assert obj1.version_id
        assert obj1.file_id is None
        assert obj1.is_head is True
        assert obj1.bucket == b

        # Set fake location.
        obj1.set_location("file:///tmp/obj1", 1, "checksum")

        # Create one object version for same object key
        obj2 = ObjectVersion.create(b, "test")
        assert obj2.bucket_id == b.id
        assert obj2.key == 'test'
        assert obj2.version_id != obj1.version_id
        assert obj2.file_id is None
        assert obj2.is_head is True
        assert obj2.bucket == b

        # Set fake location
        obj2.set_location("file:///tmp/obj2", 2, "checksum")

        # Create a new object version for a different object with no location.
        # I.e. it is considered a delete marker.
        obj3 = ObjectVersion.create(b, "deleted_obj")

    # Object __repr__
    assert str(obj1) == \
        "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key)

    # Sanity check
    assert ObjectVersion.query.count() == 3

    # Assert that obj2 is the head version
    obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id)
    assert obj.version_id == obj1.version_id
    assert obj.is_head is False
    obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id)
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True
    # Assert that getting latest version gets obj2
    obj = ObjectVersion.get(b.id, "test")
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True

    # Assert that obj3 is not retrievable (without specifying version id).
    assert ObjectVersion.get(b.id, "deleted_obj") is None
    # Assert that obj3 *is* retrievable (when specifying version id).
    assert \
        ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \
        obj3
def test_object_set_file(app, db, dummy_location):
    """Test object set file."""
    b = Bucket.create()
    f = FileInstance(uri="f1", size=1, checksum="mychecksum")
    obj = ObjectVersion.create(b, "test").set_file(f)
    db.session.commit()
    assert obj.file == f

    assert pytest.raises(FileInstanceAlreadySetError, obj.set_file, f)
Example #51
0
def test_bucket_create_object(app, db):
    """Test bucket creation."""
    with db.session.begin_nested():
        l1 = Location(name='test1', uri='file:///tmp/1', default=False)
        l2 = Location(name='test2', uri='file:///tmp/2', default=True)
        db.session.add(l1)
        db.session.add(l2)

    assert Location.query.count() == 2

    # Simple create
    with db.session.begin_nested():
        b = Bucket.create()
        assert b.id
        assert b.default_location == Location.get_default().id
        assert b.location == Location.get_default()
        assert b.default_storage_class == \
            app.config['FILES_REST_DEFAULT_STORAGE_CLASS']
        assert b.size == 0
        assert b.quota_size is None
        assert b.max_file_size is None
        assert b.deleted is False

    # __repr__ test
    assert str(b) == str(b.id)

    # Retrieve one
    assert Bucket.get(b.id).id == b.id

    # Create with location_name and storage class
    with db.session.begin_nested():
        b = Bucket.create(location=l1, storage_class='A')
        assert b.default_location == Location.get_by_name('test1').id
        assert b.default_storage_class == 'A'

        # Create using location name instead
        b = Bucket.create(location=l2.name, storage_class='A')
        assert b.default_location == Location.get_by_name('test2').id

    # Retrieve one
    assert Bucket.all().count() == 3

    # Invalid storage class.
    pytest.raises(ValueError, Bucket.create, storage_class='X')
Example #52
0
def test_object_create_with_fileid(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        obj = ObjectVersion.create(b, 'test', stream=BytesIO(b'test'))

    assert b.size == 4

    ObjectVersion.create(b, 'test', _file_id=obj.file)
    assert b.size == 8
def test_object_snapshot(app, db, dummy_location):
    """Test snapshot creation."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum")
    ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum")
    ObjectVersion.delete(b1, "deleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum")
    ObjectVersion.delete(b1, "undeleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum")
    ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum")
    ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.query.count() == 9
    assert FileInstance.query.count() == 7
    assert Bucket.query.count() == 2
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1

    # check that for 'undeleted' key there is only one HEAD
    heads = [o for o in ObjectVersion.query.filter_by(
        bucket_id=b1.id, key='undeleted').all() if o.is_head]
    assert len(heads) == 1
    assert heads[0].file.uri == 'b1u2'

    b3 = b1.snapshot(lock=True)
    db.session.commit()

    # Must be locked as requested.
    assert b1.locked is False
    assert b3.locked is True

    assert Bucket.query.count() == 3
    assert ObjectVersion.query.count() == 12
    assert FileInstance.query.count() == 7
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b3).count() == 3
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8
    assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
    def create_files(cls, record, files, existing_files):
        """Create files.

        This method is currently limited to a single bucket per record.
        """
        default_bucket = None
        # Look for bucket id in existing files.
        for f in existing_files:
            if 'bucket' in f:
                default_bucket = f['bucket']
                break

        # Create a bucket in default location if none is found.
        if default_bucket is None:
            b = Bucket.create()
            BucketTag.create(b, 'record', str(record.id))
            default_bucket = str(b.id)
            db.session.commit()
        else:
            b = Bucket.get(default_bucket)

        record['_files'] = []
        for key, meta in files.items():
            obj = cls.create_file(b, key, meta)
            ext = splitext(obj.key)[1].lower()
            if ext.startswith('.'):
                ext = ext[1:]
            record['_files'].append(dict(
                bucket=str(obj.bucket.id),
                key=obj.key,
                version_id=str(obj.version_id),
                size=obj.file.size,
                checksum=obj.file.checksum,
                type=ext,
            ))
        db.session.add(
            RecordsBuckets(record_id=record.id, bucket_id=b.id)
        )
        record.commit()
        db.session.commit()

        return [b]
Example #55
0
def image_object(database, location, image_path):
    """Get ObjectVersion of test image."""
    bucket = Bucket.create()
    database.session.commit()

    with open(image_path, 'rb') as fp:
        obj = ObjectVersion.create(
            bucket, 'test.jpg', stream=fp, size=getsize(image_path)
        )
    database.session.commit()
    return obj
Example #56
0
def test_RecordSIP_create(db, mocker):
    """Test create method from the API class RecordSIP."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    # setup metadata
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url://to/schema')
    db.session.add(mtype)
    db.session.commit()
    # first we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    mocker.patch('invenio_records.api.RecordBase.validate',
                 return_value=True, autospec=True)
    record = Record.create(
        {'title': 'record test', '$schema': 'url://to/schema'},
        recid)
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Test file\n'
    RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['test.txt'] = BytesIO(content)
    db.session.commit()
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent)
    db.session.commit()
    # test!
    assert RecordSIP_.query.count() == 1
    assert SIP_.query.count() == 1
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 1
    assert len(rsip.sip.files) == 1
    assert len(rsip.sip.metadata) == 1
    metadata = rsip.sip.metadata[0]
    assert metadata.type.format == 'json'
    assert '"title": "record test"' in metadata.content
    assert rsip.sip.archivable is True
    # we try with no files
    rsip = RecordSIP.create(pid, record, True, create_sip_files=False,
                            user_id=user.id, agent=agent)
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert len(rsip.sip.files) == 0
    assert len(rsip.sip.metadata) == 1
    # finalization
    rmtree(tmppath)
Example #57
0
def test_object_set_location(app, db, dummy_location):
    """Test object set contents."""
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        assert obj.file_id is None
        assert FileInstance.query.count() == 0
        obj.set_location("b1test1", 1, "achecksum")
        assert FileInstance.query.count() == 1
        pytest.raises(
            FileInstanceAlreadySetError,
            obj.set_location, "b1test1", 1, "achecksum")
Example #58
0
def test_object_multibucket(app, db, dummy_location):
    """Test object creation in multiple buckets."""
    with db.session.begin_nested():
        # Create two buckets each with an object using the same key
        b1 = Bucket.create()
        b2 = Bucket.create()
        obj1 = ObjectVersion.create(b1, "test")
        obj1.set_location("file:///tmp/obj1", 1, "checksum")
        obj2 = ObjectVersion.create(b2, "test")
        obj2.set_location("file:///tmp/obj2", 2, "checksum")

    # Sanity check
    assert ObjectVersion.query.count() == 2

    # Assert object versions are correctly created in each bucket.
    obj = ObjectVersion.get(b1.id, "test")
    assert obj.is_head is True
    assert obj.version_id == obj1.version_id
    obj = ObjectVersion.get(b2.id, "test")
    assert obj.is_head is True
    assert obj.version_id == obj2.version_id
Example #59
0
def test_verify_checksum(app, db, dummy_location):
    """Test celery tasks for checksum verification."""
    b = Bucket.create()
    with open('README.rst', 'rb') as fp:
        obj = ObjectVersion.create(b, 'README.rst', stream=fp)
    db.session.commit()

    verify_checksum(str(obj.file_id))

    f = FileInstance.query.get(obj.file_id)
    assert f.last_check_at
    assert f.last_check is True