Exemple #1
0
def add_file(recid, fp, replace_existing):
    """Add a new file to a published record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket
    key = os.path.basename(fp.name)

    obj = ObjectVersion.get(bucket, key)
    if obj is not None and not replace_existing:
        click.echo(
            click.style(
                u'File with key "{key}" already exists.'
                u' Use `--replace-existing/-f` to overwrite it.'.format(
                    key=key, recid=recid),
                fg='red'))
        return

    fp.seek(SEEK_SET, SEEK_END)
    size = fp.tell()
    fp.seek(SEEK_SET)

    click.echo(u'Will add the following file:\n')
    click.echo(
        click.style(u'  key: "{key}"\n'
                    u'  bucket: {bucket}\n'
                    u'  size: {size}\n'
                    u''.format(key=key, bucket=bucket.id, size=size),
                    fg='green'))
    click.echo(u'to record:\n')
    click.echo(
        click.style(u'  Title: "{title}"\n'
                    u'  RECID: {recid}\n'
                    u'  UUID: {uuid}\n'
                    u''.format(recid=record['recid'],
                               title=record['title'],
                               uuid=record.id),
                    fg='green'))
    if replace_existing and obj is not None:
        click.echo(u'and remove the file:\n')
        click.echo(
            click.style(u'  key: "{key}"\n'
                        u'  bucket: {bucket}\n'
                        u'  size: {size}\n'
                        u''.format(key=obj.key,
                                   bucket=obj.bucket,
                                   size=obj.file.size),
                        fg='green'))

    if click.confirm(u'Continue?'):
        bucket.locked = False
        if obj is not None and replace_existing:
            ObjectVersion.delete(bucket, obj.key)
        ObjectVersion.create(bucket, key, stream=fp, size=size)
        bucket.locked = True

        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File added successfully.', fg='green'))
    else:
        click.echo(click.style(u'File addition aborted.', fg='green'))
Exemple #2
0
def _create_record_from_filepath(path, rec_uuid, indexer, versions, verbose):
    with open(path) as record_file:
        record_str = record_file.read()
    record_str = resolve_community_id(record_str)
    record_str = resolve_block_schema_id(record_str)
    json_data = json.loads(record_str)
    b2share_deposit_uuid_minter(rec_uuid, data=json_data)
    deposit = Deposit.create(json_data, id_=rec_uuid)
    ObjectVersion.create(deposit.files.bucket, 'myfile',
                         stream=BytesIO(b'mycontent'))
    deposit.publish()
    pid, record = deposit.fetch_published()
    indexer.index(record)
    if verbose > 0:
        click.secho('created new record: {}'.format(str(rec_uuid)))

    last_id = pid.pid_value
    for i in range(2*versions):
        rec_uuid = uuid4()
        json_data = json.loads(record_str)
        b2share_deposit_uuid_minter(rec_uuid, data=json_data)
        deposit2 = Deposit.create(json_data, id_=rec_uuid,
                                  version_of=last_id)

        ObjectVersion.create(deposit2.files.bucket, 'myfile-ver{}'.format(i),
                             stream=BytesIO(b'mycontent'))
        deposit2.publish()
        pid, record2 = deposit2.fetch_published()
        indexer.index(record2)
        last_id = pid.pid_value
        if verbose > 0:
            click.secho('created new version: {}'.format(str(rec_uuid)))

    return record, deposit
def test_object_restore(app, db, dummy_location):
    """Restore object."""
    f1 = FileInstance(uri="f1", size=1, checksum="mychecksum")
    f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2")
    db.session.add(f1)
    db.session.add(f2)
    b1 = Bucket.create()

    obj1 = ObjectVersion.create(b1, "test").set_file(f1)
    ObjectVersion.create(b1, "test").set_file(f2)
    obj_deleted = ObjectVersion.delete(b1, "test")
    db.session.commit()

    assert ObjectVersion.query.count() == 3
    # Cannot restore a deleted version.
    pytest.raises(InvalidOperationError, obj_deleted.restore)

    # Restore first version
    obj_new = obj1.restore()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert obj_new.is_head is True
    assert obj_new.version_id != obj1.version_id
    assert obj_new.key == obj1.key
    assert obj_new.file_id == obj1.file_id
    assert obj_new.bucket == obj1.bucket
def test_object_restore(app, db, dummy_location):
    """Restore object."""
    f1 = FileInstance(uri="f1", size=1, checksum="mychecksum")
    f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2")
    db.session.add(f1)
    db.session.add(f2)
    b1 = Bucket.create()

    obj1 = ObjectVersion.create(b1, "test").set_file(f1)
    ObjectVersion.create(b1, "test").set_file(f2)
    obj_deleted = ObjectVersion.delete(b1, "test")
    db.session.commit()

    assert ObjectVersion.query.count() == 3
    # Cannot restore a deleted version.
    pytest.raises(InvalidOperationError, obj_deleted.restore)

    # Restore first version
    obj_new = obj1.restore()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert obj_new.is_head is True
    assert obj_new.version_id != obj1.version_id
    assert obj_new.key == obj1.key
    assert obj_new.file_id == obj1.file_id
    assert obj_new.bucket == obj1.bucket
def test_object_relink_all(app, db, dummy_location):
    """Test relinking files."""
    b1 = Bucket.create()
    obj1 = ObjectVersion.create(
        b1, "relink-test", stream=BytesIO(b('relinkthis')))
    ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na')))
    b1.snapshot()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert FileInstance.query.count() == 2

    fnew = FileInstance.create()
    fnew.copy_contents(obj1.file, default_location=b1.location.uri)
    db.session.commit()

    fold = obj1.file

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0

    ObjectVersion.relink_all(obj1.file, fnew)
    db.session.commit()

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user,
                                  test_users):
    """Check that the storage class will redirect pid files."""
    pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047'
    with app.app_context():
        # Disable access control for this test
        tmp_location = Location.query.first()
        with db.session.begin_nested():
            bucket = Bucket.create(tmp_location, storage_class='B')
            pid_file = FileInstance.create()
            pid_file.set_uri(pid, 1, 0, storage_class='B')
            ObjectVersion.create(bucket, 'test.txt', pid_file.id)

        db.session.commit()
        url = url_for('invenio_files_rest.object_api',
                      bucket_id=bucket.id,
                      key='test.txt')
    try:
        with app.app_context():
            permission = current_files_rest.permission_factory
            current_files_rest.permission_factory = allow_all
        # Check that accessing the file redirects to the PID
        with app.test_client() as client:
            resp = client.get(url)
            assert resp.headers['Location'] == pid
            assert resp.status_code == 302
    finally:
        with app.app_context():
            current_files_rest.permission_factory = permission
Exemple #7
0
def test_video_dumps(db, api_project, video):
    """Test video dump, in particular file dump."""
    (project, video_1, video_2) = api_project
    bucket_id = video_1['_buckets']['deposit']
    obj = ObjectVersion.create(bucket=bucket_id,
                               key='master.mp4',
                               stream=open(video, 'rb'))
    slave_1 = ObjectVersion.create(bucket=bucket_id,
                                   key='slave_1.mp4',
                                   stream=open(video, 'rb'))
    ObjectVersionTag.create(slave_1, 'master', str(obj.version_id))
    ObjectVersionTag.create(slave_1, 'media_type', 'video')
    ObjectVersionTag.create(slave_1, 'context_type', 'subformat')

    for i in reversed(range(10)):
        slave = ObjectVersion.create(bucket=bucket_id,
                                     key='frame-{0}.jpeg'.format(i),
                                     stream=BytesIO(b'\x00' * 1024))
        ObjectVersionTag.create(slave, 'master', str(obj.version_id))
        ObjectVersionTag.create(slave, 'media_type', 'image')
        ObjectVersionTag.create(slave, 'context_type', 'frame')

    db.session.commit()

    files = video_1.files.dumps()

    assert len(files) == 1
    files = files[0]  # only one master file

    assert 'frame' in files
    assert [f['key'] for f in files['frame']
            ] == ['frame-{}.jpeg'.format(i) for i in range(10)]
    assert 'subformat' in files
    assert len(files['subformat']) == 1
Exemple #8
0
    def publish(self):
        """Publish GitHub release as record."""
        id_ = uuid.uuid4()
        deposit = None
        try:
            db.session.begin_nested()
            deposit = self.deposit_class.create(self.metadata, id_=id_)
            deposit['_deposit']['created_by'] = self.event.user_id
            deposit['_deposit']['owners'] = [self.event.user_id]

            # Fetch the deposit files
            for key, url in self.files:
                # Make a HEAD request to get GitHub to compute the
                # Content-Length.
                res = self.gh.api.session.head(url, allow_redirects=True)
                # Now, download the file
                res = self.gh.api.session.get(url, stream=True)
                if res.status_code != 200:
                    raise Exception(
                        "Could not retrieve archive from GitHub: {url}".format(
                            url=url))

                size = int(res.headers.get('Content-Length', 0))
                ObjectVersion.create(
                    bucket=deposit.files.bucket,
                    key=key,
                    stream=res.raw,
                    size=size or None,
                    mimetype=res.headers.get('Content-Type'),
                )

            # GitHub-specific SIP store agent
            sip_agent = {
                '$schema':
                current_jsonschemas.path_to_url(
                    current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']),
                'user_id':
                self.event.user_id,
                'github_id':
                self.release['author']['id'],
                'email':
                self.gh.account.user.email,
            }
            deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent)
            self.model.recordmetadata = deposit.model
            db.session.commit()

            # Send Datacite DOI registration task
            recid_pid, record = deposit.fetch_published()
            datacite_register.delay(recid_pid.pid_value, str(record.id))
        except Exception:
            db.session.rollback()
            # Remove deposit from index since it was not commited.
            if deposit and deposit.id:
                try:
                    RecordIndexer().delete(deposit)
                except Exception:
                    current_app.logger.exception(
                        "Failed to remove uncommited deposit from index.")
            raise
Exemple #9
0
def create_file_in_bucket(bucket_id):
    """
    Route to upload file or files to provided bucket

    Args:
        bucket_id (str): Bucket Identifier

    Returns:
        flask.Response with content serialization as JSON
    """
    files_key = next(request.files.keys())

    for file_storage in request.files.getlist(files_key):
        with db.session.begin_nested():
            bucket = db.session.query(Bucket).filter(
                Bucket.id == bucket_id).first()

            assert bucket is not None

            ObjectVersion.create(bucket,
                                 file_storage.filename,
                                 stream=file_storage.stream)
    db.session.commit()

    return jsonify({})
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user, test_users):
    """Check that the storage class will redirect pid files."""
    pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047'
    with app.app_context():
        # Disable access control for this test
        tmp_location = Location.query.first()
        with db.session.begin_nested():
            bucket = Bucket.create(tmp_location, storage_class='B')
            pid_file = FileInstance.create()
            pid_file.set_uri(pid, 1, 0, storage_class='B')
            ObjectVersion.create(bucket, 'test.txt', pid_file.id)

        db.session.commit()
        url = url_for('invenio_files_rest.object_api',
                        bucket_id=bucket.id,
                        key='test.txt')
    try:
        with app.app_context():
            permission = current_files_rest.permission_factory
            current_files_rest.permission_factory = allow_all
        # Check that accessing the file redirects to the PID
        with app.test_client() as client:
            resp = client.get(url)
            assert resp.headers['Location'] == pid
            assert resp.status_code == 302
    finally:
        with app.app_context():
            current_files_rest.permission_factory = permission
Exemple #11
0
def test_record_publish_adds_no_handles_for_external_files(app,
                            records_data_with_external_pids,
                            test_records_data):
    """Test that no handle PIDs are created for external files."""
    for metadata in test_records_data:
        with app.app_context():
            app.config.update({'FAKE_EPIC_PID': True})

            external_pids = records_data_with_external_pids['external_pids']
            external_dict = {x['key']: x['ePIC_PID'] for x in external_pids}
            data = deepcopy(metadata)
            data['external_pids'] = deepcopy(external_pids)

            record_uuid = uuid.uuid4()
            b2share_deposit_uuid_minter(record_uuid, data=data)

            deposit = Deposit.create(data, id_=record_uuid)
            ObjectVersion.create(deposit.files.bucket, 'real_file_1.txt',
                             stream=BytesIO(b'mycontent'))
            ObjectVersion.create(deposit.files.bucket, 'real_file_2.txt',
                             stream=BytesIO(b'mycontent'))
            deposit.submit()
            deposit.publish()
            deposit.commit()

            _, record = deposit.fetch_published()

            # external files don't get a handle PID, they already have one
            # which is stored in record['_deposit']['external_pids']
            for f in record.files:
                if f['key'] in external_dict:
                    assert f.get('ePIC_PID') is None
                else:
                    assert '0000' in f['ePIC_PID'] # is a new fake PID
def persist_file_content(record: CernSearchRecord, file_content: str, filename: str):
    """Persist file's extracted content in bucket on filesystem and database."""
    current_app.logger.debug(f"Persist file: {filename} in record {record.id}")

    bucket_content = record.files_content.bucket
    ObjectVersion.create(bucket_content, filename, stream=BytesIO(file_content.encode()))
    db.session.commit()
Exemple #13
0
def save_and_validate_logo(logo_stream, logo_filename, community_id):
    """Validate if communities logo is in limit size and save it."""
    cfg = current_app.config

    logos_bucket_id = cfg['COMMUNITIES_BUCKET_UUID']
    logo_max_size = cfg['COMMUNITIES_LOGO_MAX_SIZE']
    logos_bucket = Bucket.query.get(logos_bucket_id)
    ext = os.path.splitext(logo_filename)[1]
    ext = ext[1:] if ext.startswith('.') else ext

    logo_stream.seek(SEEK_SET, SEEK_END)  # Seek from beginning to end
    logo_size = logo_stream.tell()
    if logo_size > logo_max_size:
        return None

    if ext in cfg['COMMUNITIES_LOGO_EXTENSIONS']:
        key = "{0}/logo.{1}".format(community_id, ext)
        logo_stream.seek(0)  # Rewind the stream to the beginning
        ObjectVersion.create(logos_bucket,
                             key,
                             stream=logo_stream,
                             size=logo_size)
        return ext
    else:
        return None
Exemple #14
0
def _create_record_from_filepath(path, rec_uuid, indexer, versions, verbose):
    with open(path) as record_file:
        record_str = record_file.read()
    record_str = resolve_community_id(record_str)
    record_str = resolve_block_schema_id(record_str)
    json_data = json.loads(record_str)
    b2share_deposit_uuid_minter(rec_uuid, data=json_data)
    deposit = Deposit.create(json_data, id_=rec_uuid)
    ObjectVersion.create(deposit.files.bucket,
                         'myfile',
                         stream=BytesIO(b'mycontent'))
    deposit.publish()
    pid, record = deposit.fetch_published()
    indexer.index(record)
    if verbose > 0:
        click.secho('created new record: {}'.format(str(rec_uuid)))

    last_id = pid.pid_value
    for i in range(2 * versions):
        rec_uuid = uuid4()
        json_data = json.loads(record_str)
        b2share_deposit_uuid_minter(rec_uuid, data=json_data)
        deposit2 = Deposit.create(json_data, id_=rec_uuid, version_of=last_id)

        ObjectVersion.create(deposit2.files.bucket,
                             'myfile-ver{}'.format(i),
                             stream=BytesIO(b'mycontent'))
        deposit2.publish()
        pid, record2 = deposit2.fetch_published()
        indexer.index(record2)
        last_id = pid.pid_value
        if verbose > 0:
            click.secho('created new version: {}'.format(str(rec_uuid)))

    return record, deposit
Exemple #15
0
def rename_file(recid, key, new_key):
    """Remove a file from a publishd record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket

    obj = ObjectVersion.get(bucket, key)
    if obj is None:
        click.echo(click.style(u'File with key "{key}" not found.'.format(
            key=key), fg='red'))
        return

    new_obj = ObjectVersion.get(bucket, new_key)
    if new_obj is not None:
        click.echo(click.style(u'File with key "{key}" already exists.'.format(
            key=new_key), fg='red'))
        return

    if click.confirm(u'Rename "{key}" to "{new_key}" on bucket {bucket}.'
                     u' Continue?'.format(
                        key=obj.key, new_key=new_key, bucket=bucket.id)):
        record.files.bucket.locked = False

        file_id = obj.file.id
        ObjectVersion.delete(bucket, obj.key)
        ObjectVersion.create(bucket, new_key, _file_id=file_id)
        record.files.bucket.locked = True
        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File renamed successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file rename.', fg='green'))
def test_object_relink_all(app, db, dummy_location):
    """Test relinking files."""
    b1 = Bucket.create()
    obj1 = ObjectVersion.create(
        b1, "relink-test", stream=BytesIO(b('relinkthis')))
    ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na')))
    b1.snapshot()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert FileInstance.query.count() == 2

    fnew = FileInstance.create()
    fnew.copy_contents(obj1.file, location=b1.location)
    db.session.commit()

    fold = obj1.file

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0

    ObjectVersion.relink_all(obj1.file, fnew)
    db.session.commit()

    assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0
    assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
Exemple #17
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))

                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Exemple #18
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Exemple #19
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
def test_object_create(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        # Create one object version
        obj1 = ObjectVersion.create(b, "test")
        assert obj1.bucket_id == b.id
        assert obj1.key == 'test'
        assert obj1.version_id
        assert obj1.file_id is None
        assert obj1.is_head is True
        assert obj1.bucket == b

        # Set fake location.
        obj1.set_location("file:///tmp/obj1", 1, "checksum")

        # Create one object version for same object key
        obj2 = ObjectVersion.create(b, "test")
        assert obj2.bucket_id == b.id
        assert obj2.key == 'test'
        assert obj2.version_id != obj1.version_id
        assert obj2.file_id is None
        assert obj2.is_head is True
        assert obj2.bucket == b

        # Set fake location
        obj2.set_location("file:///tmp/obj2", 2, "checksum")

        # Create a new object version for a different object with no location.
        # I.e. it is considered a delete marker.
        obj3 = ObjectVersion.create(b, "deleted_obj")

    # Object __repr__
    assert str(obj1) == \
        "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key)

    # Sanity check
    assert ObjectVersion.query.count() == 3

    # Assert that obj2 is the head version
    obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id)
    assert obj.version_id == obj1.version_id
    assert obj.is_head is False
    obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id)
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True
    # Assert that getting latest version gets obj2
    obj = ObjectVersion.get(b.id, "test")
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True

    # Assert that obj3 is not retrievable (without specifying version id).
    assert ObjectVersion.get(b.id, "deleted_obj") is None
    # Assert that obj3 *is* retrievable (when specifying version id).
    assert \
        ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \
        obj3
def test_object_create(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        # Create one object version
        obj1 = ObjectVersion.create(b, "test")
        assert obj1.bucket_id == b.id
        assert obj1.key == 'test'
        assert obj1.version_id
        assert obj1.file_id is None
        assert obj1.is_head is True
        assert obj1.bucket == b

        # Set fake location.
        obj1.set_location("file:///tmp/obj1", 1, "checksum")

        # Create one object version for same object key
        obj2 = ObjectVersion.create(b, "test")
        assert obj2.bucket_id == b.id
        assert obj2.key == 'test'
        assert obj2.version_id != obj1.version_id
        assert obj2.file_id is None
        assert obj2.is_head is True
        assert obj2.bucket == b

        # Set fake location
        obj2.set_location("file:///tmp/obj2", 2, "checksum")

        # Create a new object version for a different object with no location.
        # I.e. it is considered a delete marker.
        obj3 = ObjectVersion.create(b, "deleted_obj")

    # Object __repr__
    assert str(obj1) == \
        "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key)

    # Sanity check
    assert ObjectVersion.query.count() == 3

    # Assert that obj2 is the head version
    obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id)
    assert obj.version_id == obj1.version_id
    assert obj.is_head is False
    obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id)
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True
    # Assert that getting latest version gets obj2
    obj = ObjectVersion.get(b.id, "test")
    assert obj.version_id == obj2.version_id
    assert obj.is_head is True

    # Assert that obj3 is not retrievable (without specifying version id).
    assert ObjectVersion.get(b.id, "deleted_obj") is None
    # Assert that obj3 *is* retrievable (when specifying version id).
    assert \
        ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \
        obj3
def objects(db, bucket):
    """File system location."""
    obj1 = ObjectVersion.create(
        bucket, 'LICENSE', stream=BytesIO(b('license file')))
    obj2 = ObjectVersion.create(
        bucket, 'README.rst', stream=BytesIO(b('readme file')))
    db.session.commit()

    yield [obj1, obj2]
Exemple #23
0
def attach_file(file_id, pid_type1, pid_value1, key1, pid_type2, pid_value2,
                key2):
    """Attach a file to a record or deposit.

    You must provide the information which will determine the first file, i.e.:
    either 'file-id' OR 'pid-type1', 'pid-value1' and 'key1'.
    Additionally you need to specify the information on the target
    record/deposit, i.e.: 'pid-type2', 'pid-value2' and 'key2'.
    """
    assert ((file_id or (pid_type1 and pid_value1 and key1))
            and (pid_type2 and pid_value2 and key2))

    msg = u"PID type must be 'recid' or 'depid'."
    if pid_type1:
        assert pid_type1 in ('recid', 'depid', ), msg
    assert pid_type2 in ('recid', 'depid', ), msg

    if not file_id:
        resolver = record_resolver if pid_type1 == 'recid' \
            else deposit_resolver
        pid1, record1 = resolver.resolve(pid_value1)
        bucket1 = record1.files.bucket

        obj1 = ObjectVersion.get(bucket1, key1)
        if obj1 is None:
            click.echo(click.style(u'File with key "{key}" not found.'.format(
                key=key1), fg='red'))
            return
        file_id = obj1.file.id

    resolver = record_resolver if pid_type2 == 'recid' else deposit_resolver
    pid2, record2 = resolver.resolve(pid_value2)
    bucket2 = record2.files.bucket

    obj2 = ObjectVersion.get(bucket2, key2)
    if obj2 is not None:
        click.echo(click.style(u'File with key "{key}" already exists on'
                               u' bucket {bucket}.'.format(
                                   key=key2, bucket=bucket2.id), fg='red'))
        return

    if click.confirm(u'Attaching file "{file_id}" to bucket {bucket2}'
                     u' as "{key2}". Continue?'.format(
                         file_id=file_id, key2=key2,
                         bucket2=bucket2.id)):
        record2.files.bucket.locked = False

        ObjectVersion.create(bucket2, key2, _file_id=file_id)
        if pid_type2 == 'recid':
            record2.files.bucket.locked = True
        record2.files.flush()
        record2.commit()
        db.session.commit()
        click.echo(click.style(u'File attached successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file attaching.', fg='green'))
Exemple #24
0
def attach_file(file_id, pid_type1, pid_value1, key1, pid_type2, pid_value2,
                key2):
    """Attach a file to a record or deposit.

    You must provide the information which will determine the first file, i.e.:
    either 'file-id' OR 'pid-type1', 'pid-value1' and 'key1'.
    Additionally you need to specify the information on the target
    record/deposit, i.e.: 'pid-type2', 'pid-value2' and 'key2'.
    """
    assert ((file_id or (pid_type1 and pid_value1 and key1))
            and (pid_type2 and pid_value2 and key2))

    msg = u"PID type must be 'recid' or 'depid'."
    if pid_type1:
        assert pid_type1 in ('recid', 'depid', ), msg
    assert pid_type2 in ('recid', 'depid', ), msg

    if not file_id:
        resolver = record_resolver if pid_type1 == 'recid' \
            else deposit_resolver
        pid1, record1 = resolver.resolve(pid_value1)
        bucket1 = record1.files.bucket

        obj1 = ObjectVersion.get(bucket1, key1)
        if obj1 is None:
            click.echo(click.style(u'File with key "{key}" not found.'.format(
                key=key1), fg='red'))
            return
        file_id = obj1.file.id

    resolver = record_resolver if pid_type2 == 'recid' else deposit_resolver
    pid2, record2 = resolver.resolve(pid_value2)
    bucket2 = record2.files.bucket

    obj2 = ObjectVersion.get(bucket2, key2)
    if obj2 is not None:
        click.echo(click.style(u'File with key "{key}" already exists on'
                               u' bucket {bucket}.'.format(
                                   key=key2, bucket=bucket2.id), fg='red'))
        return

    if click.confirm(u'Attaching file "{file_id}" to bucket {bucket2}'
                     u' as "{key2}". Continue?'.format(
                         file_id=file_id, key2=key2,
                         bucket2=bucket2.id)):
        record2.files.bucket.locked = False

        ObjectVersion.create(bucket2, key2, _file_id=file_id)
        if pid_type2 == 'recid':
            record2.files.bucket.locked = True
        record2.files.flush()
        record2.commit()
        db.session.commit()
        click.echo(click.style(u'File attached successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file attaching.', fg='green'))
def test_object_create_with_fileid(app, db, dummy_location):
    """Test object creation."""
    with db.session.begin_nested():
        b = Bucket.create()
        obj = ObjectVersion.create(b, 'test', stream=BytesIO(b'test'))

    assert b.size == 4

    ObjectVersion.create(b, 'test', _file_id=obj.file)
    assert b.size == 8
Exemple #26
0
def create_b2safe_file(external_pids, bucket):
    """Create a FileInstance which contains a PID in its uri."""
    validate_schema(
        external_pids, {
            'type': 'array',
            'items': {
                'type': 'object',
                'properties': {
                    'ePIC_PID': {
                        'type': 'string'
                    },
                    'key': {
                        'type': 'string'
                    }
                },
                'additionalProperties': False,
                'required': ['ePIC_PID', 'key']
            }
        })

    keys_list = [e['key'] for e in external_pids]
    keys_set = set(keys_list)
    if len(keys_list) != len(keys_set):
        raise InvalidDepositError([
            FieldError('external_pids',
                       'Field external_pids contains duplicate keys.')
        ])
    for external_pid in external_pids:
        if not external_pid['ePIC_PID'].startswith('http://hdl.handle.net/'):
            external_pid['ePIC_PID'] = 'http://hdl.handle.net/' + \
                external_pid['ePIC_PID']
        if external_pid['key'].startswith('/'):
            raise InvalidDepositError([
                FieldError('external_pids',
                           'File key cannot start with a "/".')
            ])
        try:
            # Create the file instance if it does not already exist
            file_instance = FileInstance.get_by_uri(external_pid['ePIC_PID'])
            if file_instance is None:
                file_instance = FileInstance.create()
                file_instance.set_uri(external_pid['ePIC_PID'],
                                      1,
                                      0,
                                      storage_class='B')
            assert file_instance.storage_class == 'B'
            # Add the file to the bucket if it is not already in it
            current_version = ObjectVersion.get(bucket, external_pid['key'])
            if not current_version or \
                    current_version.file_id != file_instance.id:
                ObjectVersion.create(bucket, external_pid['key'],
                                     file_instance.id)
        except IntegrityError as e:
            raise InvalidDepositError(
                [FieldError('external_pids', 'File URI already exists.')])
def test_object_set_contents(app, db, dummy_location):
    """Test object set contents."""
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        assert obj.file_id is None
        assert FileInstance.query.count() == 0

        # Save a file.
        with open('LICENSE', 'rb') as fp:
            obj.set_contents(fp)

    # Assert size, location and checksum
    assert obj.file_id is not None
    assert obj.file.uri is not None
    assert obj.file.size == getsize('LICENSE')
    assert obj.file.checksum is not None
    assert b1.size == obj.file.size

    # Try to overwrite
    with db.session.begin_nested():
        with open('LICENSE', 'rb') as fp:
            pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp)

    # Save a new version with different content
    with db.session.begin_nested():
        obj2 = ObjectVersion.create(b1, "LICENSE")
        with open('README.rst', 'rb') as fp:
            obj2.set_contents(fp)

    assert obj2.file_id is not None and obj2.file_id != obj.file_id
    assert obj2.file.size == getsize('README.rst')
    assert obj2.file.uri != obj.file.uri
    assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size

    obj2.file.verify_checksum()
    assert obj2.file.last_check_at
    assert obj2.file.last_check is True
    old_checksum = obj2.file.checksum
    obj2.file.checksum = "md5:invalid"
    assert obj2.file.verify_checksum() is False

    previous_last_check = obj2.file.last_check
    previous_last_check_date = obj2.file.last_check_at
    with db.session.begin_nested():
        obj2.file.checksum = old_checksum
        obj2.file.uri = 'invalid'
    pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum)
    assert obj2.file.last_check == previous_last_check
    assert obj2.file.last_check_at == previous_last_check_date

    obj2.file.verify_checksum(throws=False)
    assert obj2.file.last_check is None
    assert obj2.file.last_check_at != previous_last_check_date
def test_deposit_vtt_tags(api_app, db, api_project, users):
    """Test VTT tag generation."""
    project, video_1, video_2 = api_project
    video_1_depid = video_1['_deposit']['id']

    # insert a master file inside the video
    add_master_to_video(
        video_deposit=video_1,
        filename='test.mp4',
        stream=BytesIO(b'1234'), video_duration="15"
    )
    # try to insert a new vtt object
    obj = ObjectVersion.create(
        video_1._bucket, key="test_fr.vtt",
        stream=BytesIO(b'hello'))
    # publish the video
    prepare_videos_for_publish([video_1])
    video_1 = deposit_video_resolver(video_1_depid)
    login_user(User.query.get(users[0]))
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj, video_1, content_type='vtt', media_type='subtitle',
                      context_type='subtitle', language='fr')

    # edit the video
    video_1 = video_1.edit()

    # try to delete the old vtt file and substitute with a new one
    video_1 = deposit_video_resolver(video_1_depid)
    ObjectVersion.delete(bucket=video_1._bucket, key=obj.key)
    obj2 = ObjectVersion.create(
        video_1._bucket, key="test_en.vtt", stream=BytesIO(b'hello'))

    # publish again the video
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj2, video_1, content_type='vtt', media_type='subtitle',
                      context_type='subtitle', language='en')

    # edit a re-published video
    video_1 = video_1.edit()

    # add a new vtt file
    obj3 = ObjectVersion.create(
        video_1._bucket, key="test_it.vtt", stream=BytesIO(b'hello'))

    # publish again the video
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj3, video_1, content_type='vtt', media_type='subtitle',
                      context_type='subtitle', language='it')
def test_object_set_contents(app, db, dummy_location):
    """Test object set contents."""
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        assert obj.file_id is None
        assert FileInstance.query.count() == 0

        # Save a file.
        with open('LICENSE', 'rb') as fp:
            obj.set_contents(fp)

    # Assert size, location and checksum
    assert obj.file_id is not None
    assert obj.file.uri is not None
    assert obj.file.size == getsize('LICENSE')
    assert obj.file.checksum is not None
    assert b1.size == obj.file.size

    # Try to overwrite
    with db.session.begin_nested():
        with open('LICENSE', 'rb') as fp:
            pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp)

    # Save a new version with different content
    with db.session.begin_nested():
        obj2 = ObjectVersion.create(b1, "LICENSE")
        with open('README.rst', 'rb') as fp:
            obj2.set_contents(fp)

    assert obj2.file_id is not None and obj2.file_id != obj.file_id
    assert obj2.file.size == getsize('README.rst')
    assert obj2.file.uri != obj.file.uri
    assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size

    obj2.file.verify_checksum()
    assert obj2.file.last_check_at
    assert obj2.file.last_check is True
    old_checksum = obj2.file.checksum
    obj2.file.checksum = "md5:invalid"
    assert obj2.file.verify_checksum() is False

    previous_last_check = obj2.file.last_check
    previous_last_check_date = obj2.file.last_check_at
    with db.session.begin_nested():
        obj2.file.checksum = old_checksum
        obj2.file.uri = 'invalid'
    pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum)
    assert obj2.file.last_check == previous_last_check
    assert obj2.file.last_check_at == previous_last_check_date

    obj2.file.verify_checksum(throws=False)
    assert obj2.file.last_check is None
    assert obj2.file.last_check_at != previous_last_check_date
Exemple #30
0
def _create_bucket(deposit, record_json, directory, logfile):
    for index, file_dict in enumerate(record_json.get('files', [])):
        click.secho('    Load file "{}"'.format(file_dict.get('name')))
        filepath = os.path.join(directory, 'file_{}'.format(index))
        if int(os.path.getsize(filepath)) != int(file_dict.get('size')):
            logfile.write("\n********************")
            logfile.write("\nERROR: downloaded file size differs for file {}: {} instead of {}"
                          .format(filepath, os.path.getsize(filepath), file_dict.get('size')))
            logfile.write("\n********************")
        else:
            with open(filepath, 'r+b') as f:
                ObjectVersion.create(deposit.files.bucket, file_dict['name'],
                                     stream=BytesIO(f.read()))
def test_bucket_sync_new_object(app, db, dummy_location):
    """Test that a new file in src in synced to dest."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 0
    b1.sync(b2)
    assert ObjectVersion.get_by_bucket(b1).count() == 1
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get(b2, "filename")
def test_object_version_tags(app, db, dummy_location):
    """Test object version tags."""
    f = FileInstance(uri="f1", size=1, checksum="mychecksum")
    db.session.add(f)
    db.session.commit()
    b = Bucket.create()
    obj1 = ObjectVersion.create(b, "test").set_file(f)
    ObjectVersionTag.create(obj1, "mykey", "testvalue")
    ObjectVersionTag.create(obj1, "another_key", "another value")
    db.session.commit()

    # Duplicate key
    pytest.raises(
        IntegrityError, ObjectVersionTag.create, obj1, "mykey", "newvalue")

    # Test get
    assert ObjectVersionTag.query.count() == 2
    assert ObjectVersionTag.get(obj1, "mykey").value == "testvalue"
    assert ObjectVersionTag.get_value(obj1.version_id, "another_key") \
        == "another value"
    assert ObjectVersionTag.get_value(obj1, "invalid") is None

    # Test delete
    ObjectVersionTag.delete(obj1, "mykey")
    assert ObjectVersionTag.query.count() == 1
    ObjectVersionTag.delete(obj1, "invalid")
    assert ObjectVersionTag.query.count() == 1

    # Create or update
    ObjectVersionTag.create_or_update(obj1, "another_key", "newval")
    ObjectVersionTag.create_or_update(obj1.version_id, "newkey", "testval")
    db.session.commit()
    assert ObjectVersionTag.get_value(obj1, "another_key") == "newval"
    assert ObjectVersionTag.get_value(obj1, "newkey") == "testval"

    # Get tags as dictionary
    assert obj1.get_tags() == dict(another_key="newval", newkey="testval")
    obj2 = ObjectVersion.create(b, 'test2')
    assert obj2.get_tags() == dict()

    # Copy object version
    obj_copy = obj1.copy()
    db.session.commit()
    assert obj_copy.get_tags() == dict(another_key="newval", newkey="testval")
    assert ObjectVersionTag.query.count() == 4

    # Cascade delete
    ObjectVersion.query.delete()
    db.session.commit()
    assert ObjectVersionTag.query.count() == 0
def test_object_mimetype(app, db, dummy_location):
    """Test object set file."""
    b = Bucket.create()
    db.session.commit()
    obj1 = ObjectVersion.create(b, "test.pdf", stream=BytesIO(b'pdfdata'))
    obj2 = ObjectVersion.create(b, "README", stream=BytesIO(b'pdfdata'))

    assert obj1.mimetype == "application/pdf"
    assert obj2.mimetype == "application/octet-stream"

    # Override computed MIME type.
    obj2.mimetype = "text/plain"
    db.session.commit()
    assert ObjectVersion.get(b, "README").mimetype == "text/plain"
Exemple #34
0
def files():
    """Load files."""
    srcroot = dirname(dirname(__file__))
    d = current_app.config['DATADIR']
    if exists(d):
        shutil.rmtree(d)
    makedirs(d)

    # Clear data
    Part.query.delete()
    MultipartObject.query.delete()
    ObjectVersion.query.delete()
    Bucket.query.delete()
    FileInstance.query.delete()
    Location.query.delete()
    db.session.commit()

    # Create location
    loc = Location(name='local', uri=d, default=True)
    db.session.add(loc)
    db.session.commit()

    # Bucket 0
    b1 = Bucket.create(loc)
    b1.id = '00000000-0000-0000-0000-000000000000'
    for f in ['README.rst', 'LICENSE']:
        with open(join(srcroot, f), 'rb') as fp:
            ObjectVersion.create(b1, f, stream=fp)

    # Bucket 1
    b2 = Bucket.create(loc)
    b2.id = '11111111-1111-1111-1111-111111111111'
    k = 'AUTHORS.rst'
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)

    k = 'RELEASE-NOTES.rst'
    with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    ObjectVersion.delete(b2.id, k)

    # Bucket 2
    b2 = Bucket.create(loc)
    b2.id = '22222222-2222-2222-2222-222222222222'

    db.session.commit()
Exemple #35
0
def test_deposit_poster_tags(api_app, db, api_project, users):
    """Test poster tag generation."""
    project, video_1, video_2 = api_project
    video_1_depid = video_1['_deposit']['id']
    master_video_filename = 'test.mp4'
    poster_filename = 'poster.jpg'
    poster_filename2 = 'poster.png'

    # insert a master file inside the video
    add_master_to_video(video_deposit=video_1,
                        filename=master_video_filename,
                        stream=BytesIO(b'1234'),
                        video_duration='15')
    # try to insert a new vtt object
    obj = ObjectVersion.create(video_1._bucket,
                               key=poster_filename,
                               stream=BytesIO(b'hello'))
    # publish the video
    prepare_videos_for_publish([video_1])
    video_1 = deposit_video_resolver(video_1_depid)
    login_user(User.query.get(users[0]))
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj,
                      video_1,
                      content_type='jpg',
                      context_type='poster',
                      media_type='image')

    # edit the video
    video_1 = video_1.edit()

    # try to delete the old poster frame and substitute with a new one
    video_1 = deposit_video_resolver(video_1_depid)
    ObjectVersion.delete(bucket=video_1._bucket, key=obj.key)
    obj2 = ObjectVersion.create(video_1._bucket,
                                key=poster_filename2,
                                stream=BytesIO(b'hello'))

    # publish again the video
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj2,
                      video_1,
                      content_type='png',
                      context_type='poster',
                      media_type='image')
Exemple #36
0
def files():
    """Load files."""
    srcroot = dirname(dirname(__file__))
    d = current_app.config['DATADIR']
    if exists(d):
        shutil.rmtree(d)
    makedirs(d)

    # Clear data
    Part.query.delete()
    MultipartObject.query.delete()
    ObjectVersion.query.delete()
    Bucket.query.delete()
    FileInstance.query.delete()
    Location.query.delete()
    db.session.commit()

    # Create location
    loc = Location(name='local', uri=d, default=True)
    db.session.commit()

    # Bucket 0
    b1 = Bucket.create(loc)
    b1.id = '00000000-0000-0000-0000-000000000000'
    for f in ['README.rst', 'LICENSE']:
        with open(join(srcroot, f), 'rb') as fp:
            ObjectVersion.create(b1, f, stream=fp)

    # Bucket 1
    b2 = Bucket.create(loc)
    b2.id = '11111111-1111-1111-1111-111111111111'
    k = 'AUTHORS.rst'
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)

    k = 'RELEASE-NOTES.rst'
    with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    ObjectVersion.delete(b2.id, k)

    # Bucket 2
    b2 = Bucket.create(loc)
    b2.id = '22222222-2222-2222-2222-222222222222'

    db.session.commit()
def save_and_validate_logo(logo_stream, logo_filename, community_id):
    """Validate if communities logo is in limit size and save it."""
    cfg = current_app.config

    logos_bucket_id = cfg['COMMUNITIES_BUCKET_UUID']
    logos_bucket = Bucket.query.get(logos_bucket_id)
    ext = os.path.splitext(logo_filename)[1]
    ext = ext[1:] if ext.startswith('.') else ext

    if ext in cfg['COMMUNITIES_LOGO_EXTENSIONS']:
        key = "{0}/logo.{1}".format(community_id, ext)
        ObjectVersion.create(logos_bucket, key, stream=logo_stream)
        return ext
    else:
        return None
def objects(db, bucket):
    """File system location."""
    data_bytes = b('license file')
    obj1 = ObjectVersion.create(
        bucket, 'LICENSE', stream=BytesIO(data_bytes),
        size=len(data_bytes)
    )
    data_bytes2 = b('readme file')
    obj2 = ObjectVersion.create(
        bucket, 'README.rst', stream=BytesIO(data_bytes2),
        size=len(data_bytes2)
    )
    db.session.commit()

    yield [obj1, obj2]
Exemple #39
0
def save_and_validate_logo(logo_stream, logo_filename, community_id):
    """Validate if communities logo is in limit size and save it."""
    cfg = current_app.config

    logos_bucket_id = cfg['COMMUNITIES_BUCKET_UUID']
    logos_bucket = Bucket.query.get(logos_bucket_id)
    ext = os.path.splitext(logo_filename)[1]
    ext = ext[1:] if ext.startswith('.') else ext

    if ext in cfg['COMMUNITIES_LOGO_EXTENSIONS']:
        key = "{0}/logo.{1}".format(community_id, ext)
        ObjectVersion.create(logos_bucket, key, stream=logo_stream)
        return ext
    else:
        return None
Exemple #40
0
    def save_file(self, content, filename, size, failed=False):
        """Save file with given content in deposit bucket.

           If downloading a content failed, file will be still created,
           with tag `failed`.

           :param content: stream
           :param filename: name that file will be saved with
           :param size: size of content
           :param failed: if failed during downloading the content
        """
        obj = ObjectVersion.create(bucket=self.files.bucket, key=filename)
        obj.file = FileInstance.create()
        self.files.flush()

        if not failed:
            self.files[filename].file.set_contents(
                content,
                default_location=self.files.bucket.location.uri,
                size=size)

            print('File {} saved ({}b).\n'.format(filename, size))
        else:
            ObjectVersionTag.create(object_version=obj,
                                    key='status',
                                    value='failed')
            print('File {} not saved.\n'.format(filename))

        self.files.flush()
        db.session.commit()

        return obj
def test_pyfilesystemstorage(app, db, dummy_location):
    """Test pyfs storage."""
    # Create bucket and object
    with db.session.begin_nested():
        b = Bucket.create()
        obj = ObjectVersion.create(b, "LICENSE")
        obj.file = FileInstance()
        db.session.add(obj.file)

    storage = PyFilesystemStorage(obj, obj.file)
    with open('LICENSE', 'rb') as fp:
        loc, size, checksum = storage.save(fp)

    # Verify checksum, size and location.
    with open('LICENSE', 'rb') as fp:
        m = hashlib.md5()
        m.update(fp.read())
        assert "md5:{0}".format(m.hexdigest()) == checksum

    assert size == getsize('LICENSE')
    assert size == getsize('LICENSE')
    assert loc == \
        join(
            dummy_location.uri,
            str(b.id),
            str(obj.version_id),
            "data")
Exemple #42
0
    def _copy_file(self, source_record, ov, target_record, file_md,
                   source_record_context, target_record_context):
        bucket = target_record.bucket
        new_ob = ObjectVersion.create(bucket, ov.key, _file_id=ov.file_id)

        tags = {tag.key: tag.value for tag in ov.tags}
        for _, res in file_copied.send(
                source_record,
                source_record=source_record,
                target_record=target_record,
                object_version=ov,
                tags=tags,
                metadata=file_md,
                source_record_context=source_record_context,
                target_record_context=target_record_context):
            if res is False:
                return False  # skip this file

        for key, value in tags:
            ObjectVersionTag.create_or_update(object_version=new_ob,
                                              key=key,
                                              value=value)

        file_md['bucket'] = str(bucket.id)
        file_md['file_id'] = str(new_ob.file_id)
        file_md['version_id'] = str(new_ob.version_id)

        return True
def test_verify_checksum(app, db, dummy_location):
    """Test celery tasks for checksum verification."""
    b1 = Bucket.create()
    with open('README.rst', 'rb') as fp:
        obj = ObjectVersion.create(b1, 'README.rst', stream=fp)
    db.session.commit()
    file_id = obj.file_id

    verify_checksum(str(file_id))

    f = FileInstance.query.get(file_id)
    assert f.last_check_at
    assert f.last_check is True

    f.uri = 'invalid'
    db.session.add(f)
    db.session.commit()
    pytest.raises(ResourceNotFoundError, verify_checksum, str(file_id),
                  throws=True)

    f = FileInstance.query.get(file_id)
    assert f.last_check is True

    verify_checksum(str(file_id), throws=False)
    f = FileInstance.query.get(file_id)
    assert f.last_check is None

    f.last_check = True
    db.session.add(f)
    db.session.commit()
    with pytest.raises(ResourceNotFoundError):
        verify_checksum(str(file_id), pessimistic=True)
    f = FileInstance.query.get(file_id)
    assert f.last_check is None
Exemple #44
0
def handle_record_files(data, bucket, files, skip_files):
    """Handles record files."""
    for file in files:
        if skip_files:
            break
        assert 'uri' in file
        assert 'size' in file
        assert 'checksum' in file

        try:
            f = FileInstance.create()
            filename = file.get("uri").split('/')[-1:][0]
            f.set_uri(file.get("uri"), file.get(
                "size"), file.get("checksum"))
            obj = ObjectVersion.create(
                bucket,
                filename,
                _file_id=f.id
            )

            file.update({
                'bucket': str(obj.bucket_id),
                'checksum': obj.file.checksum,
                'key': obj.key,
                'version_id': str(obj.version_id),
            })

        except Exception as e:
            click.echo(
                'Recid {0} file {1} could not be loaded due '
                'to {2}.'.format(data.get('recid'), filename,
                                 str(e)))
            continue
def test_SIP_files(db):
    """Test the files methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.files) == 0
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    # we attach it to the SIP
    sf = api_sip.attach_file(obj)
    db.session.commit()
    assert len(api_sip.files) == 1
    assert api_sip.files[0].filepath == 'test.txt'
    assert sip.sip_files[0].filepath == 'test.txt'
    # finalization
    rmtree(tmppath)
def test_pyfilesystemstorage(app, db, dummy_location):
    """Test pyfs storage."""
    # Create bucket and object
    with db.session.begin_nested():
        b1 = Bucket.create()
        obj = ObjectVersion.create(b1, "LICENSE")
        obj.file = FileInstance.create()

    storage = PyFilesystemStorage(obj.file, base_uri=obj.bucket.location.uri)
    counter = dict(size=0)

    def callback(total, size):
        counter['size'] = size

    data = b("this is some content")
    stream = BytesIO(data)
    loc, size, checksum = storage.save(stream, progress_callback=callback)

    # Verify checksum, size and location.
    m = hashlib.md5()
    m.update(data)
    assert "md5:{0}".format(m.hexdigest()) == checksum

    assert size == len(data)
    assert loc == join(
        dummy_location.uri,
        str(obj.file.id),
        "data")
Exemple #47
0
def get_local_file(bucket, datadir, filename):
    """Create local file as objectversion."""
    stream = open(join(datadir, filename), 'rb')
    object_version = ObjectVersion.create(bucket, "test.mp4", stream=stream)
    version_id = object_version.version_id
    db.session.commit()
    return version_id
 def __setitem__(self, key, stream):
     """Add file inside a deposit."""
     with db.session.begin_nested():
         # save the file
         obj = ObjectVersion.create(
             bucket=self.bucket, key=key, stream=stream)
         self.filesmap[key] = self.file_cls(obj, {}).dumps()
         self.flush()
Exemple #49
0
 def open(self):
     """Open the bucket for writing."""
     self.obj = ObjectVersion.create(
         self.bucket_id,
         self.key() if callable(self.key) else self.key
     )
     db.session.commit()
     return self
Exemple #50
0
def create_b2safe_file(external_pids, bucket):
    """Create a FileInstance which contains a PID in its uri."""
    validate_schema(external_pids, {
        'type': 'array',
        'items': {
            'type': 'object',
            'properties': {
                'ePIC_PID': {'type': 'string'},
                'key': {'type': 'string'}
            },
            'additionalProperties': False,
            'required': ['ePIC_PID', 'key']
        }
    })

    keys_list = [e['key'] for e in external_pids]
    keys_set = set(keys_list)
    if len(keys_list) != len(keys_set):
        raise InvalidDepositError([FieldError('external_pids',
            'Field external_pids contains duplicate keys.')])
    for external_pid in external_pids:
        if not external_pid['ePIC_PID'].startswith('http://hdl.handle.net/'):
            external_pid['ePIC_PID'] = 'http://hdl.handle.net/' + \
                external_pid['ePIC_PID']
        if external_pid['key'].startswith('/'):
            raise InvalidDepositError(
                [FieldError('external_pids',
                            'File key cannot start with a "/".')])
        try:
            # Create the file instance if it does not already exist
            file_instance = FileInstance.get_by_uri(external_pid['ePIC_PID'])
            if file_instance is None:
                file_instance = FileInstance.create()
                file_instance.set_uri(
                    external_pid['ePIC_PID'], 1, 0, storage_class='B')
            assert file_instance.storage_class == 'B'
            # Add the file to the bucket if it is not already in it
            current_version = ObjectVersion.get(bucket, external_pid['key'])
            if not current_version or \
                    current_version.file_id != file_instance.id:
                ObjectVersion.create(bucket, external_pid['key'],
                                     file_instance.id)
        except IntegrityError as e:
            raise InvalidDepositError(
                [FieldError('external_pids', 'File URI already exists.')])
def test_object_set_file(app, db, dummy_location):
    """Test object set file."""
    b = Bucket.create()
    f = FileInstance(uri="f1", size=1, checksum="mychecksum")
    obj = ObjectVersion.create(b, "test").set_file(f)
    db.session.commit()
    assert obj.file == f

    assert pytest.raises(FileInstanceAlreadySetError, obj.set_file, f)
Exemple #52
0
 def __setitem__(self, key, stream):
     """Add file inside a deposit."""
     with db.session.begin_nested():
         size = None
         if request and request.files and request.files.get('file'):
             size = request.files['file'].content_length or None
         obj = ObjectVersion.create(
             bucket=self.bucket, key=key, stream=stream, size=size)
         self.filesmap[key] = self.file_cls(obj, {}).dumps()
         self.flush()
    def __setitem__(self, key, stream):
        """Add file inside a deposit."""
        with db.session.begin_nested():
            # save the file
            obj = ObjectVersion.create(bucket=self.bucket, key=key,
                                       stream=stream)

            # update deposit['_files']
            if key not in self.record['_files']:
                self.record['_files'].append({'key': key})
Exemple #54
0
def test_object(db, bucket):
    """File system location."""
    data_bytes = b('test object')
    obj = ObjectVersion.create(
        bucket, 'test.txt', stream=BytesIO(data_bytes),
        size=len(data_bytes)
    )
    db.session.commit()

    return obj
Exemple #55
0
def image_object(database, location, image_path):
    """Get ObjectVersion of test image."""
    bucket = Bucket.create()
    database.session.commit()

    with open(image_path, 'rb') as fp:
        obj = ObjectVersion.create(
            bucket, 'test.jpg', stream=fp, size=getsize(image_path)
        )
    database.session.commit()
    return obj