Ejemplo n.º 1
0
def new_project(app,
                es,
                cds_jsonresolver,
                users,
                location,
                db,
                deposit_metadata,
                project_data=None,
                wait=None):
    """New project with videos."""
    project_data = project_data or {
        'title': {
            'title': 'my project',
        },
        'description': 'in tempor reprehenderit enim eiusmod',
    }
    project_data.update(deposit_metadata)
    project_video_1 = {
        'title': {
            'title': 'video 1',
        },
        'description': 'in tempor reprehenderit enim eiusmod',
        'featured': True,
        'vr': True,
        'language': 'en',
        'date': '2017-09-25',
    }
    project_video_1.update(deposit_metadata)
    project_video_2 = {
        'title': {
            'title': 'video 2',
        },
        'description': 'in tempor reprehenderit enim eiusmod',
        'featured': False,
        'vr': False,
        'language': 'en',
        'date': '2017-09-25',
    }
    project_video_2.update(deposit_metadata)
    with app.test_request_context():
        login_user(User.query.get(users[0]))

        # create empty project
        project = Project.create(project_data).commit()

        # create videos
        project_video_1['_project_id'] = project['_deposit']['id']
        project_video_2['_project_id'] = project['_deposit']['id']
        video_1 = Video.create(project_video_1)
        video_2 = Video.create(project_video_2)

        # save project and video
        project.commit()
        video_1.commit()
        video_2.commit()

    db.session.commit()
    if wait is not False:
        sleep(2)
    return project, video_1, video_2
Ejemplo n.º 2
0
def api_project_published(api_app, api_project):
    """New published project with videos."""
    (project, video_1, video_2) = api_project
    with api_app.test_request_context():
        prepare_videos_for_publish([video_1, video_2])
        new_project = project.publish()
        new_videos = [record_resolver.resolve(id_)[1]
                      for id_ in new_project.video_ids]
        assert len(new_videos) == 2
    return (new_project,
            Video.get_record(new_videos[0].id),
            Video.get_record(new_videos[1].id))
Ejemplo n.º 3
0
def test_add_video(app, es, cds_jsonresolver, users, location):
    """Test add video."""
    project_data = {
        'title': {
            'title': 'my project',
        },
        'videos': [],
    }

    login_user(User.query.get(users[0]))

    # create empty project
    project = Project.create(project_data).commit()

    # check project <--/--> video
    assert project['videos'] == []

    # create video
    project_video_1 = {
        'title': {
            'title': 'video 1',
        },
        '_project_id': project['_deposit']['id'],
    }
    video_1 = Video.create(project_video_1)

    # check project <----> video
    assert project._find_refs([video_1.ref])
    assert video_1.project.id == project.id
Ejemplo n.º 4
0
def test_sync_records_with_deposits(app, db, location, users,
                                    project_deposit_metadata,
                                    video_deposit_metadata):
    """Test sync records with deposits task."""
    # create a project
    project = Project.create(project_deposit_metadata)
    project_deposit_metadata['report_number'] = ['123']
    # create new video
    video_deposit_metadata['_project_id'] = project['_deposit']['id']
    deposit = Video.create(video_deposit_metadata)
    depid = deposit['_deposit']['id']

    # insert objects inside the deposit
    ObjectVersion.create(deposit.files.bucket,
                         "obj_1").set_location("mylocation1", 1, "mychecksum1")
    ObjectVersion.create(deposit.files.bucket,
                         "obj_2").set_location("mylocation2", 1, "mychecksum2")
    ObjectVersion.create(deposit.files.bucket,
                         "obj_3").set_location("mylocation3", 1, "mychecksum3")
    obj_4 = ObjectVersion.create(deposit.files.bucket,
                                 "obj_4").set_location("mylocation4", 1,
                                                       "mychecksum4")

    # publish
    login_user(User.query.get(users[0]))
    prepare_videos_for_publish([deposit])
    deposit = deposit.publish()
    _, record = deposit.fetch_published()
    assert deposit.is_published() is True

    # add a new object
    ObjectVersion.create(deposit.files.bucket,
                         "obj_new").set_location("mylocation_new", 1,
                                                 "mychecksum")
    # modify obj_1
    ObjectVersion.create(deposit.files.bucket,
                         "obj_new").set_location("mylocation2.1", 1,
                                                 "mychecksum2.1")
    # delete obj_3
    ObjectVersion.delete(deposit.files.bucket, "obj_3")
    # remove obj_4
    obj_4.remove()

    # check video and record
    files = ['obj_1', 'obj_2', 'obj_3', 'obj_4']
    edited_files = ['obj_1', 'obj_2', 'obj_3', 'obj_new']
    check_deposit_record_files(deposit, edited_files, record, files)

    # try to sync deposit and record
    sync_records_with_deposit_files.s(deposit_id=depid).apply_async()

    # get deposit and record
    deposit = deposit_video_resolver(depid)
    _, record = deposit.fetch_published()
    assert deposit.is_published() is True

    # check that record and deposit are sync
    re_edited_files = edited_files + ['obj_4']
    check_deposit_record_files(deposit, edited_files, record, re_edited_files)
Ejemplo n.º 5
0
def project(app, deposit_rest, es, cds_jsonresolver, users, location, db):
    """New project with videos."""
    project_data = {
        'title': {
            'title': 'my project',
        },
        'description': {
            'value': 'in tempor reprehenderit enim eiusmod',
        },
    }
    project_video_1 = {
        'title': {
            'title': 'video 1',
        },
        'description': {
            'value': 'in tempor reprehenderit enim eiusmod',
        },
    }
    project_video_2 = {
        'title': {
            'title': 'video 2',
        },
        'description': {
            'value': 'in tempor reprehenderit enim eiusmod',
        },
    }
    with app.test_request_context():
        login_user(User.query.get(users[0]))

        # create empty project
        project = Project.create(project_data).commit()

        # create videos
        project_video_1['_project_id'] = project['_deposit']['id']
        project_video_2['_project_id'] = project['_deposit']['id']
        video_1 = Video.create(project_video_1)
        video_2 = Video.create(project_video_2)

        # save project and video
        project.commit()
        video_1.commit()
        video_2.commit()

    db.session.commit()
    sleep(2)
    return (project, video_1, video_2)
Ejemplo n.º 6
0
def test_project_discard(app, project_published, video_deposit_metadata):
    """Test project discard."""
    (project, video_1, video_2) = project_published

    # try successfully to discard a project
    original_title = project['title']['title']
    new_title = 'modified project'
    project = project.edit()
    project['title']['title'] = 'modified project'
    assert project['title']['title'] == new_title
    project = project.discard()
    assert project['title']['title'] == original_title

    # try to fail because a video added
    project = project.edit()
    video_deposit_metadata['_project_id'] = project['_deposit']['id']
    Video.create(video_deposit_metadata)
    with pytest.raises(DiscardConflict):
        project.discard()
Ejemplo n.º 7
0
def test_add_video(api_app, es, cds_jsonresolver, users, location,
                   project_deposit_metadata, video_deposit_metadata):
    """Test add video."""
    project_data = deepcopy(project_deposit_metadata)

    login_user(User.query.get(users[0]))

    # create empty project
    project = Project.create(project_data).commit()

    # check default project license
    assert project['license'] == [{
        'license': 'CERN',
        'material': '',
        'url': 'http://copyright.web.cern.ch',
    }]

    # check default copyright
    assert 'copyright' not in project

    # check project <--/--> video
    assert project['videos'] == []

    # create video
    project_video_1 = deepcopy(video_deposit_metadata)
    if 'license' in project_video_1:
        del project_video_1['license']
    if 'copyright' in project_video_1:
        del project_video_1['copyright']
    project_video_1['title']['title'] = 'video 1'
    project_video_1['_project_id'] = project['_deposit']['id']
    video_1 = Video.create(project_video_1)

    # check default video license
    assert video_1['license'] == [{
        'license': 'CERN',
        'material': '',
        'url': 'http://copyright.web.cern.ch',
    }]

    # check default video copyright
    year = str(datetime.date.today().year)
    assert video_1['copyright'] == {
        'holder': 'CERN',
        'year': year,
        'url': 'http://copyright.web.cern.ch',
    }

    # check project <----> video
    assert project._find_refs([video_1.ref])
    assert video_1.project.id == project.id
Ejemplo n.º 8
0
    def _force_sync_deposit_bucket(record):
        """Replace deposit bucket with a copy of the record bucket."""
        deposit = Video.get_record(record.depid.object_uuid)
        # if deposit['_deposit']['status'] == 'draft':
        #     raise RuntimeError('Deposit in edit mode: {0}'.format(deposit.id))
        deposit_old_bucket = deposit.files.bucket
        # create a copy of record bucket
        new_bucket = record.files.bucket.snapshot()
        new_bucket.locked = False
        db.session.commit()
        rb = RecordsBuckets.query.filter(
            RecordsBuckets.bucket_id == deposit_old_bucket.id).one()
        rb.bucket = new_bucket
        db.session.add(rb)
        db.session.commit()

        # Put tags correctly pointing to the right object
        master_file = CDSVideosFilesIterator.get_master_video_file(record)
        if master_file:
            master_deposit_obj = ObjectVersion.get(new_bucket,
                                                   master_file['key'])

            for slave in ObjectVersion.query_heads_by_bucket(
                    bucket=new_bucket).join(ObjectVersion.tags).filter(
                        ObjectVersion.file_id.isnot(None),
                        ObjectVersionTag.key == 'master'):
                ObjectVersionTag.create_or_update(
                    slave, 'master', str(master_deposit_obj.version_id))
                db.session.add(slave)
                db.session.commit()

        # Delete the old bucket
        deposit_old_bucket.locked = False
        _ = deposit_old_bucket.remove()

        deposit['_buckets']['deposit'] = str(new_bucket.id)
        record['_buckets']['deposit'] = str(new_bucket.id)
        record['_deposit'] = deposit['_deposit']
        deposit['_files'] = deposit.files.dumps()
        deposit.commit()
        record.commit()
        db.session.commit()

        return deposit_old_bucket.id, new_bucket.id
Ejemplo n.º 9
0
def test_project_partial_validation(api_app, db, api_cds_jsonresolver,
                                    deposit_metadata, location,
                                    video_deposit_metadata, users):
    """Test project create/publish with partial validation/validation."""
    video_1 = deepcopy(video_deposit_metadata)
    # create a deposit without a required field
    if 'category' in deposit_metadata:
        del deposit_metadata['category']
    with api_app.test_request_context():
        login_user(User.query.get(users[0]))
        project = Project.create(deposit_metadata)
        video_1['_project_id'] = project['_deposit']['id']
        video_1 = Video.create(video_1)
        prepare_videos_for_publish([video_1])
        video_1.commit()
        id_ = project.id
        db.session.expire_all()
        project = Project.get_record(id_)
        assert project is not None
        # if publish, then generate an validation error
        with pytest.raises(ValidationError):
            project.publish()
        # patch project
        patch = [{
            'op': 'add',
            'path': '/category',
            'value': 'bar',
        }]
        id_ = project.id
        db.session.expire_all()
        project = Project.get_record(id_)
        project.patch(patch).commit()
        # update project
        copy = deepcopy(project)
        copy['category'] = 'qwerty'
        id_ = project.id
        db.session.expire_all()
        project = Project.get_record(id_)
        project.update(copy)
        # assert not raise a validation exception
        project.commit()
Ejemplo n.º 10
0
def test_subformat_creation_if_missing(api_app, location, datadir, es, users):
    """Test subformat creation if missing."""
    # [[ migrate the video ]]
    migration_streams = get_migration_streams(datadir=datadir)
    data = load_json(datadir, 'cds_records_demo_1_video.json')
    dump = CDSRecordDump(data=data[0])
    with mock.patch.object(DataCiteProvider, 'register'), \
            mock.patch.object(CDSRecordDumpLoader, '_create_frame',
                              side_effect=get_frames), \
            mock.patch.object(ExtractFramesTask, '_create_gif'), \
            mock.patch.object(CDSRecordDumpLoader, '_clean_file_list'), \
            mock.patch.object(
                CDSRecordDumpLoader, '_get_migration_file_stream_and_size',
                side_effect=migration_streams):
        video = CDSRecordDumpLoader.create(dump=dump)
    db.session.commit()

    with mock.patch.object(TranscodeVideoTask, 'run') as mock_transcode:
        deposit = deposit_video_resolver(video['_deposit']['id'])
        deposit_id = deposit.id
        # simulate the missing of a subformat
        del deposit['_files'][0]['subformat'][0]
        assert len(deposit['_files'][0]['subformat']) == 4
        #  recreate 240p format
        CDSRecordDumpLoader._create_missing_subformats(
            record=video, deposit=deposit)
        db.session.commit()
        # check subformats
        deposit = Video.get_record(deposit_id)
        rec_video = record_resolver.resolve(video['recid'])[1]
        #  rec_video = record_resolver.resolve(video['recid'])[1]
        assert len(deposit['_files'][0]['subformat']) == 5
        assert len(rec_video['_files'][0]['subformat']) == 5
        # check if transcoding is called properly
        assert mock_transcode.called is True
        [(_, call_args)] = mock_transcode.call_args_list
        assert call_args == {'preset_quality': '240p'}
Ejemplo n.º 11
0
def test_drupal_serializer(video_record_metadata, deposit_metadata):
    """Test drupal serializer."""
    duration = '00:01:00.140'
    report_number = 'RN-01'
    video_record_metadata.update(deposit_metadata)
    video_record_metadata.update({
        'report_number': [report_number],
        '$schema':
        Video.get_record_schema(),
        'duration':
        duration,
        'contributors': [{
            'name': 'paperone',
            'role': 'Director'
        }, {
            'name': 'topolino',
            'role': 'Music by'
        }, {
            'name': 'nonna papera',
            'role': 'Producer'
        }, {
            'name': 'pluto',
            'role': 'Director'
        }, {
            'name': 'zio paperino',
            'role': 'Producer'
        }],
    })
    expected = dict(
        caption_en='in tempor reprehenderit enim eiusmod',
        caption_fr='france caption',
        copyright_date='2017',
        copyright_holder='CERN',
        creation_date='2017-03-02',
        directors='paperone, pluto',
        entry_date='2017-09-25',
        id=report_number,
        keywords='keyword1, keyword2',
        license_body='GPLv2',
        license_url='http://license.cern.ch',
        producer='nonna papera, zio paperino',
        record_id='1',
        title_en='My english title',
        title_fr='My french title',
        type='video',
        video_length=duration,
    )

    # Proper publication date
    serializer = VideoDrupal(video_record_metadata)
    data = serializer.format()['entries'][0]['entry']
    data = {k: data[k] for k in data if k in expected}
    assert data == expected

    # Empty publication date
    del video_record_metadata['publication_date']
    expected['creation_date'] = ''

    serializer = VideoDrupal(video_record_metadata)
    data = serializer.format()['entries'][0]['entry']
    data = {k: data[k] for k in data if k in expected}
    assert data == expected
Ejemplo n.º 12
0
def test_video_record_schema(app, db, api_project):
    """Test video record schema."""
    (project, video_1, video_2) = api_project
    assert video_1.record_schema == Video.get_record_schema()
Ejemplo n.º 13
0
def test_migrate_record(frames_required, api_app, location, datadir, es,
                        users):
    """Test migrate date."""
    # [[ migrate the project ]]
    data = load_json(datadir, 'cds_records_demo_1_project.json')
    dump = CDSRecordDump(data=data[0])
    project = CDSRecordDumpLoader.create(dump=dump)
    p_id = project.id

    assert project['$schema'] == Project.get_record_schema()
    assert project['publication_date'] == '2016-01-05'
    assert 'license' not in project
    assert 'copyright' not in project
    assert project['_cds'] == {
        "state": {
            "file_transcode": "SUCCESS",
            "file_video_extract_frames": "SUCCESS",
            "file_video_metadata_extraction": "SUCCESS"
        },
        'modified_by': users[0],
    }

    # check project deposit
    deposit_project_uuid = PersistentIdentifier.query.filter_by(
        pid_type='depid', object_type='rec').one().object_uuid
    deposit_project = Record.get_record(deposit_project_uuid)
    assert Project._schema in deposit_project['$schema']
    assert project.revision_id == deposit_project[
        '_deposit']['pid']['revision_id']
    assert deposit_project['_deposit']['created_by'] == 1
    assert deposit_project['_deposit']['owners'] == [1]
    assert deposit_project['_files'] == []

    # [[ migrate the video ]]
    data = load_json(datadir, 'cds_records_demo_1_video.json')
    dump = CDSRecordDump(data=data[0])
    db.session.commit()

    def check_symlinks(video):
        symlinks_creator = SymlinksCreator()
        files = list(symlinks_creator._get_list_files(record=video))
        assert len(files) == 1
        for file_ in files:
            path = symlinks_creator._build_link_path(
                symlinks_creator._symlinks_location, video, file_['key'])
            assert os.path.lexists(path)

    def check_gif(video, mock_gif):
        # called only once for deposit
        (_, _, mock_args) = mock_gif.mock_calls[0]
        # check gif record
        video = CDSRecord(dict(video), video.model)
        # check gif deposit
        deposit = deposit_video_resolver(video['_deposit']['id'])
        master_video = CDSVideosFilesIterator.get_master_video_file(deposit)
        assert mock_args['master_id'] == master_video['version_id']
        assert str(deposit.files.bucket.id) == mock_args['bucket']
        #  assert mock_args['bucket'].id == deposit.files.bucket.id
        assert len(mock_args['frames']) == 10
        assert 'output_dir' in mock_args

    migration_streams = get_migration_streams(datadir=datadir)
    with mock.patch.object(DataCiteProvider, 'register'), \
            mock.patch.object(CDSRecordDumpLoader, '_create_frame',
                              side_effect=get_frames), \
            mock.patch.object(CDSRecordDumpLoader, '_get_minimum_frames',
                              return_value=frames_required) as mock_frames, \
            mock.patch.object(
                ExtractFramesTask, '_create_gif') as mock_gif, \
            mock.patch.object(
                CDSRecordDumpLoader, '_get_migration_file_stream_and_size',
                side_effect=migration_streams), \
            mock.patch.object(CDSRecordDumpLoader, '_clean_file_list'):
        video = CDSRecordDumpLoader.create(dump=dump)
        assert mock_frames.called is True
    db.session.add(video.model)
    video_id = video.id
    # check smil file
    smil_obj = ObjectVersion.query.filter_by(
        key='CERN-MOVIE-2012-193-001.smil', is_head=True).one()
    storage = smil_obj.file.storage()
    assert '<video src' in storage.open().read().decode('utf-8')
    # check video symlinks
    check_symlinks(video)
    # check gif
    check_gif(video, mock_gif)
    # check project
    project = Record.get_record(p_id)
    assert project['videos'] == [
        {'$ref': 'https://cds.cern.ch/api/record/1495143'}
    ]
    assert video['$schema'] == Video.get_record_schema()
    assert video['date'] == '2012-11-21'  # metadata data
    assert video['publication_date'] == '2017-07-13'  # creation date (DB)
    assert video['_project_id'] == '2093596'
    assert video['license'] == [{
        'license': 'CERN',
        'url': 'http://copyright.web.cern.ch',
    }]
    assert video['copyright'] == {
        'holder': 'CERN',
        'year': '2012',
        'url': 'http://copyright.web.cern.ch',
    }
    assert video['description'] == ''
    assert 'doi' in video
    assert video['_cds']['state'] == {
        "file_transcode": "SUCCESS",
        "file_video_extract_frames": "SUCCESS",
        "file_video_metadata_extraction": "SUCCESS"
    }
    assert 'extracted_metadata' in video['_cds']

    def check_files(video):
        bucket = CDSRecordDumpLoader._get_bucket(record=video)
        files = [dump_object(obj)
                 for obj in ObjectVersion.get_by_bucket(bucket=bucket)]
        for file_ in files:
            assert as_bucket(file_['bucket_id']) is not None
            assert 'checksum' in file_
            assert 'content_type' in file_
            assert 'context_type' in file_
            assert FileInstance.query.filter_by(
                id=file_['file_id']) is not None
            assert 'key' in file_
            assert 'links' in file_
            assert 'content_type' in file_
            assert 'context_type' in file_
            assert 'media_type' in file_
            assert 'tags' in file_

        # check extracted metadata
        master_video = CDSVideosFilesIterator.get_master_video_file(video)
        assert any([key in master_video['tags']
                    for key in ExtractMetadataTask._all_keys])
        assert any([key in video['_cds']['extracted_metadata']
                    for key in ExtractMetadataTask._all_keys])

    def check_buckets(record, deposit):
        def get(key, record):
            bucket = CDSRecordDumpLoader._get_bucket(record=record)
            files = [dump_object(obj)
                     for obj in ObjectVersion.get_by_bucket(bucket=bucket)]
            return [file_[key] for file_ in files]

        def check(record, deposit, file_key, different=None):
            values_record = set(get(file_key, record))
            values_deposit = set(get(file_key, deposit))
            difference = len(values_record - values_deposit)
            assert different == difference

        def check_tag_master(record):
            bucket = CDSRecordDumpLoader._get_bucket(record=record)
            master = CDSVideosFilesIterator.get_master_video_file(record)
            files = [dump_object(obj)
                     for obj in ObjectVersion.get_by_bucket(bucket=bucket)
                     if obj.get_tags().get('master')]
            assert all([file_['tags']['master'] == master['version_id']
                        for file_ in files])

        # 1 bucket record != 1 bucket deposit
        check(record, deposit, 'bucket_id', 1)
        # all file_id are the same except the smil file (only in record)
        check(record, deposit, 'file_id', 1)
        check(record, deposit, 'key', 1)
        # 18 object_version record != 17 object_version deposit
        check(record, deposit, 'version_id', 18)
        # check tag 'master' where is pointing
        check_tag_master(record)
        check_tag_master(deposit)

    def check_first_level_files(record):
        [master] = [file_ for file_ in deposit_video['_files']
                    if file_['context_type'] == 'master']
        assert len(master['subformat']) == 5
        assert len(master['frame']) == 10
        # TODO assert len(master['playlist']) == ??
        assert len([file_ for file_ in deposit_video['_files']
                    if file_['context_type'] == 'master']) == 1
        duration = float(record['_cds']['extracted_metadata']['duration'])
        for frame in master['frame']:
            assert float(frame['tags']['timestamp']) < duration
            assert float(frame['tags']['timestamp']) > 0
        # check tag 'preset_quality'
        pqs = [form['tags']['preset_quality'] for form in master['subformat']]
        assert sorted(pqs) == sorted(['1080p', '240p', '360p', '480p', '720p'])
        # check tag 'display_aspect_ratio'
        dar = set([form['tags']['display_aspect_ratio']
                   for form in master['subformat']])
        assert dar == {'16:9'}

    def check_pids(record):
        """Check pids."""
        assert record['report_number'][0] == 'CERN-VIDEO-2012-193-001'
        assert PersistentIdentifier.query.filter_by(
            pid_value='CERN-VIDEO-2012-193-001').count() == 1
        assert PersistentIdentifier.query.filter_by(
            pid_value='CERN-MOVIE-2012-193-001').count() == 1

    db.session.commit()

    # check video deposit
    deposit_video_uuid = PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_type == 'depid',
        PersistentIdentifier.object_uuid != str(deposit_project_uuid),
        PersistentIdentifier.object_type == 'rec'
    ).one().object_uuid
    deposit_video = Video.get_record(str(deposit_video_uuid))
    assert Video._schema in deposit_video['$schema']
    video = Record.get_record(video_id)
    assert video.revision_id == deposit_video[
        '_deposit']['pid']['revision_id']
    assert deposit_video['_deposit']['created_by'] == users[0]
    assert deposit_video['_deposit']['owners'] == [users[0]]
    assert deposit_video['_project_id'] == '2093596'
    assert len(video['_files']) == 2
    assert len(deposit_video['_files']) == 2
    check_files(video)
    check_files(deposit_video)
    check_buckets(video, deposit_video)
    check_first_level_files(video)
    check_first_level_files(deposit_video)
    check_pids(video)

    # try to edit video
    deposit_video = deposit_video_resolver(deposit_video['_deposit']['id'])
    deposit_video = deposit_video.edit()

    # try to edit project
    deposit_project = deposit_project_resolver(
        deposit_project['_deposit']['id'])
    deposit_project = deposit_project.edit()

    login_user(User.query.filter_by(id=users[0]).first())
    deposit_video['title']['title'] = 'test'
    deposit_video = deposit_video.publish()
    _, record_video = deposit_video.fetch_published()
    assert record_video['title']['title'] == 'test'
Ejemplo n.º 14
0
 def get_video_record(depid):
     deposit = deposit_video_resolver(depid)
     return Video.get_record(deposit.fetch_published()[1].id)
Ejemplo n.º 15
0
def test_migrate_record(app, location, datadir, es):
    """Test migrate date."""
    # create the project
    data = load_json(datadir, 'cds_records_demo_1_project.json')
    dump = CDSRecordDump(data=data[0])
    project = CDSRecordDumpLoader.create(dump=dump)
    p_id = project.id

    date = '2015-11-13'
    assert project['$schema'] == Project.get_record_schema()
    assert project['date'] == date
    assert project['publication_date'] == date
    assert 'license' not in project
    assert 'copyright' not in project
    assert project['_cds'] == {
        "state": {
            "file_transcode": "SUCCESS",
            "file_video_extract_frames": "SUCCESS",
            "file_video_metadata_extraction": "SUCCESS"
        },
        'modified_by': None,
    }

    # check project deposit
    deposit_project_uuid = PersistentIdentifier.query.filter_by(
        pid_type='depid', object_type='rec').one().object_uuid
    deposit_project = Record.get_record(deposit_project_uuid)
    assert Project._schema in deposit_project['$schema']
    assert project.revision_id == deposit_project[
        '_deposit']['pid']['revision_id']
    assert deposit_project['_deposit']['created_by'] == -1
    assert deposit_project['_deposit']['owners'] == [-1]
    assert deposit_project['_files'] == []

    # create the video
    data = load_json(datadir, 'cds_records_demo_1_video.json')
    dump = CDSRecordDump(data=data[0])

    def load_video(*args, **kwargs):
        return open(join(datadir, 'test.mp4'), 'rb')

    with mock.patch.object(DataCiteProvider, 'register') as mock_datacite, \
            mock.patch.object(
                CDSRecordDumpLoader, '_get_migration_file_stream',
                return_value=load_video()):
        video = CDSRecordDumpLoader.create(dump=dump)
        # assert mock_datacite.called is True
    project = Record.get_record(p_id)
    assert project['videos'] == [
        {'$ref': 'https://cds.cern.ch/api/record/1495143'}
    ]
    assert video['$schema'] == Video.get_record_schema()
    date = '2012-11-20'
    assert video['date'] == date
    assert video['publication_date'] == date
    assert video['_project_id'] == '2093596'
    assert video['license'] == [{
        'license': 'CERN',
        'url': 'http://copyright.web.cern.ch',
    }]
    assert video['copyright'] == {
        'holder': 'CERN',
        'year': '2012',
        'url': 'http://copyright.web.cern.ch',
    }
    assert video['description'] == ''
    assert 'doi' in video
    assert video['_cds']['state'] == {
        "file_transcode": "SUCCESS",
        "file_video_extract_frames": "SUCCESS",
        "file_video_metadata_extraction": "SUCCESS"
    }
    assert 'extracted_metadata' in video['_cds']

    def check_files(video):
        bucket = CDSRecordDumpLoader._get_bucket(record=video)
        files = [dump_object(obj)
                 for obj in ObjectVersion.get_by_bucket(bucket=bucket)]
        for file_ in files:
            assert as_bucket(file_['bucket_id']) is not None
            assert 'checksum' in file_
            assert 'content_type' in file_
            assert 'context_type' in file_
            assert FileInstance.query.filter_by(
                id=file_['file_id']) is not None
            assert 'key' in file_
            assert 'links' in file_
            assert 'content_type' in file_
            assert 'context_type' in file_
            assert 'media_type' in file_
            assert 'tags' in file_

        # check extracted metadata
        master_video = CDSVideosFilesIterator.get_master_video_file(video)
        assert any([key in master_video['tags']
                    for key in ExtractMetadataTask._all_keys])
        assert any([key in video['_cds']['extracted_metadata']
                    for key in ExtractMetadataTask._all_keys])

    def check_buckets(record, deposit):
        def get(key, record):
            bucket = CDSRecordDumpLoader._get_bucket(record=record)
            files = [dump_object(obj)
                     for obj in ObjectVersion.get_by_bucket(bucket=bucket)]
            return [file_[key] for file_ in files]

        def check(record, deposit, file_key, different=None):
            values_record = set(get(file_key, record))
            values_deposit = set(get(file_key, deposit))
            difference = len(values_record - values_deposit)
            assert different == difference

        def check_tag_master(record):
            bucket = CDSRecordDumpLoader._get_bucket(record=record)
            master = CDSVideosFilesIterator.get_master_video_file(record)
            files = [dump_object(obj)
                     for obj in ObjectVersion.get_by_bucket(bucket=bucket)
                     if obj.get_tags().get('master')]
            assert all([file_['tags']['master'] == master['version_id']
                        for file_ in files])

        # 1 bucket record != 1 bucket deposit
        check(record, deposit, 'bucket_id', 1)
        # all file_id are the same except the smil file (only in record)
        check(record, deposit, 'file_id', 1)
        check(record, deposit, 'key', 1)
        # 18 object_version record != 17 object_version deposit
        check(record, deposit, 'version_id', 18)
        # check tag 'master' where is pointing
        check_tag_master(record)
        check_tag_master(deposit)

    def check_first_level_files(record):
        [master] = [file_ for file_ in deposit_video['_files']
                    if file_['context_type'] == 'master']
        assert len(master['subformat']) == 5
        assert len(master['frame']) == 10
        # TODO assert len(master['playlist']) == ??
        assert len([file_ for file_ in deposit_video['_files']
                    if file_['context_type'] == 'master']) == 1
        duration = float(record['_cds']['extracted_metadata']['duration'])
        for frame in master['frame']:
            assert float(frame['tags']['timestamp']) < duration
            assert float(frame['tags']['timestamp']) > 0

    # check video deposit
    deposit_video_uuid = PersistentIdentifier.query.filter(
        PersistentIdentifier.pid_type == 'depid',
        PersistentIdentifier.object_uuid != str(deposit_project_uuid),
        PersistentIdentifier.object_type == 'rec'
    ).one().object_uuid
    deposit_video = Video.get_record(str(deposit_video_uuid))
    assert Video._schema in deposit_video['$schema']
    assert video.revision_id == deposit_video[
        '_deposit']['pid']['revision_id']
    assert deposit_video['_deposit']['created_by'] == -1
    assert deposit_video['_deposit']['owners'] == [-1]
    assert len(video['_files']) == 2
    assert len(deposit_video['_files']) == 2
    check_files(video)
    check_files(deposit_video)
    check_buckets(video, deposit_video)
    check_first_level_files(video)
    check_first_level_files(deposit_video)

    # try to edit video
    deposit_video = deposit_video_resolver(deposit_video['_deposit']['id'])
    deposit_video = deposit_video.edit()

    # try to edit project
    deposit_project = deposit_project_resolver(
        deposit_project['_deposit']['id'])
    deposit_project = deposit_project.edit()

    # try to publish again the video
    deposit_video['title']['title'] = 'test'
    deposit_video = deposit_video.publish()
    _, record_video = deposit_video.fetch_published()
    assert record_video['title']['title'] == 'test'