Ejemplo n.º 1
0
def test_autoadd_explicit_newversion(
        app, db, users, communities, deposit, deposit_file,
        communities_autoadd_enabled):
    """Explicitly the autoadded communities in a new version."""
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    deposit_v2['communities'] = ['ecfunded', 'grants_comm', 'zenodo']
    deposit_v2['grants'] = [{'title': 'SomeGrant'}, ]
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    assert record_v1.get('communities', []) == ['grants_comm', ]
    assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
    assert record_v2.get('communities', []) == ['grants_comm', ]
    assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
Ejemplo n.º 2
0
def test_propagation_with_newversion_open(
        app, db, users, communities, deposit, deposit_file):
    """Adding old versions to a community should propagate to all drafts."""
    # deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    deposit_v1 = deposit_v1.edit()

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    # New version in 'deposit_v2' has not been published yet
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    # depid_v1_value = deposit_v1['_deposit']['id']
    # depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1['communities'] = ['c1', 'c2', ]
    deposit_v1 = publish_and_expunge(db, deposit_v1)

    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    c1_api = ZenodoCommunity('c1')
    c1_api.accept_record(record_v1, pid=recid_v1)

    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    assert deposit_v2['communities'] == ['c1', 'c2']
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    assert record_v2['communities'] == ['c1', ]
Ejemplo n.º 3
0
def versioning_published_record(uuid):
    """Migrate a published record."""
    record = ZenodoRecord.get_record(uuid)
    if 'conceptrecid' in record:
        return
    # ASSERT ZENODO DOI ONLY!
    assert 'conceptrecid' not in record, "Record already migrated"
    # doi = PersistentIdentifier.get('doi', str(record['doi']))
    # assert is_local_doi(doi.pid_value), 'DOI is not controlled by Zenodo.'
    conceptrecid = zenodo_concept_recid_minter(uuid, record)
    conceptrecid.register()
    recid = PersistentIdentifier.get('recid', str(record['recid']))
    pv = PIDVersioning(parent=conceptrecid)
    pv.insert_child(recid)
    record.commit()
    # Some old records have no deposit ID, some don't have '_deposit'
    if ('_deposit' in record and 'id' in record['_deposit']
            and record['_deposit']['id']):
        try:
            depid = PersistentIdentifier.get('depid',
                                             str(record['_deposit']['id']))
            deposit = ZenodoDeposit.get_record(depid.object_uuid)
            deposit['conceptrecid'] = conceptrecid.pid_value
            if deposit['_deposit']['status'] == 'draft':
                deposit['_deposit']['pid']['revision_id'] = \
                    deposit['_deposit']['pid']['revision_id'] + 1
            deposit.commit()
        except PIDDoesNotExistError:
            pass
    db.session.commit()
Ejemplo n.º 4
0
def test_autoadd_explicit_newversion(
        app, db, users, communities, deposit, deposit_file,
        communities_autoadd_enabled):
    """Explicitly the autoadded communities in a new version."""
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    deposit_v2['communities'] = ['ecfunded', 'grants_comm', 'zenodo']
    deposit_v2['grants'] = [{'title': 'SomeGrant'}, ]
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    assert record_v1.get('communities', []) == ['grants_comm', ]
    assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
    assert record_v2.get('communities', []) == ['grants_comm', ]
    assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
Ejemplo n.º 5
0
def test_propagation_with_newversion_open(
        app, db, users, communities, deposit, deposit_file):
    """Adding old versions to a community should propagate to all drafts."""
    # deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    deposit_v1 = deposit_v1.edit()

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    # New version in 'deposit_v2' has not been published yet
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    # depid_v1_value = deposit_v1['_deposit']['id']
    # depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1['communities'] = ['c1', 'c2', ]
    deposit_v1 = publish_and_expunge(db, deposit_v1)

    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    c1_api = ZenodoCommunity('c1')
    c1_api.accept_record(record_v1, pid=recid_v1)

    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    assert deposit_v2['communities'] == ['c1', 'c2']
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    assert record_v2['communities'] == ['c1', ]
Ejemplo n.º 6
0
def test_basic_api(app, db, communities, deposit, deposit_file):
    """Test basic workflow using Deposit and Communities API."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c2' through deposit v2
    deposit_v2['communities'] = [
        'c1',
        'c2',
    ]
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    recid_v2_value = recid_v2.pid_value
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')

    # Inclusion requests should be visible for both records
    assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    # Accept to 'c1' through record_v2 (as originally requested),
    # and 'c2' through record_v1 (version)
    c1_api.accept_record(record_v2, pid=recid_v2)
    c2_api.accept_record(record_v1, pid=recid_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    # Accepting individual record to a community should propagate the changes
    # to all versions
    assert record_v1['communities'] == record_v2['communities'] == \
        ['c1', 'c2', ]

    # Removing 'c1' from deposit_v1 should remove it from two published records
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1 = deposit_v1.edit()
    deposit_v1['communities'] = []
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []
Ejemplo n.º 7
0
def test_record_delete_v2(mocker, app, db, users, deposit, deposit_file):
    """Delete a record (only last version) with multiple versions."""
    dc_mock = mocker.patch(
        'invenio_pidstore.providers.datacite.DataCiteMDSClient')
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit.fetch_published()
    recid_v1_value = recid_v1.pid_value
    deposit_v1.newversion()
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)

    # Stash a copy of v1 for later
    rec1 = deepcopy(record_v1)

    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    # Stash a copy of v2 for later
    rec2 = deepcopy(record_v2)
    rec2_id = str(record_v2.id)

    assert dc_mock().metadata_delete.call_count == 0

    # Remove the first version
    delete_record(rec2_id, 'spam', users[0]['id'])

    # Make sure all PIDs are deleted
    assert PID.get('doi', rec2['doi']).status == PIDStatus.DELETED
    assert PID.get('recid', rec2['recid']).status == PIDStatus.DELETED
    assert PID.get('depid', rec2['_deposit']['id']).status == PIDStatus.DELETED

    # Concept DOI should be left registered
    assert PID.get('doi', rec2['conceptdoi']).status == PIDStatus.REGISTERED

    # Make sure conceptrecid is redirecting to v1
    crecid = PID.get('recid', rec2['conceptrecid'])
    assert crecid.status == PIDStatus.REDIRECTED
    assert crecid.get_redirect() == PID.get('recid', rec1['recid'])

    # Make sure the v1 PIDs are kept intact
    assert PID.get('oai', rec1['_oai']['id']).status == PIDStatus.REGISTERED
    assert PID.get('doi', rec1['doi']).status == PIDStatus.REGISTERED
    assert PID.get('recid', rec1['recid']).status == PIDStatus.REGISTERED
    assert PID.get('depid', rec1['_deposit']['id']).status == \
        PIDStatus.REGISTERED

    # Only the v1 DOI should be deleted
    assert dc_mock().doi_post.call_count == 2
    assert dc_mock().doi_post.has_any_call('10.5072/zenodo.2')
    assert dc_mock().doi_post.has_any_call('10.5072/zenodo.1')
    assert dc_mock().metadata_delete.call_count == 1
    dc_mock().metadata_delete.assert_any_call('10.5072/zenodo.3')
    record = Record.get_record(rec2_id)
    assert record['removed_by'] == users[0]['id']
    assert record['removal_reason'] == 'Spam record, removed by Zenodo staff.'
Ejemplo n.º 8
0
def test_basic_api(app, db, communities, deposit, deposit_file):
    """Test basic workflow using Deposit and Communities API."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c2' through deposit v2
    deposit_v2['communities'] = ['c1', 'c2', ]
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    recid_v2_value = recid_v2.pid_value
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')

    # Inclusion requests should be visible for both records
    assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    # Accept to 'c1' through record_v2 (as originally requested),
    # and 'c2' through record_v1 (version)
    c1_api.accept_record(record_v2, pid=recid_v2)
    c2_api.accept_record(record_v1, pid=recid_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    # Accepting individual record to a community should propagate the changes
    # to all versions
    assert record_v1['communities'] == record_v2['communities'] == \
        ['c1', 'c2', ]

    # Removing 'c1' from deposit_v1 should remove it from two published records
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1 = deposit_v1.edit()
    deposit_v1['communities'] = []
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    assert record_v1.get('communities', []) == []
    assert record_v2.get('communities', []) == []
Ejemplo n.º 9
0
def _publish_and_expunge(db, deposit):
    """Publish the deposit and expunge the session.

    Use this if you want to be safe that session is synced with the DB after
    the deposit publishing."""
    deposit.publish()
    dep_uuid = deposit.id
    db.session.commit()
    db.session.expunge_all()
    deposit = Deposit.get_record(dep_uuid)
    return deposit
Ejemplo n.º 10
0
def _publish_and_expunge(db, deposit):
    """Publish the deposit and expunge the session.

    Use this if you want to be safe that session is synced with the DB after
    the deposit publishing."""
    deposit.publish()
    dep_uuid = deposit.id
    db.session.commit()
    db.session.expunge_all()
    deposit = Deposit.get_record(dep_uuid)
    return deposit
Ejemplo n.º 11
0
def versioning_new_deposit(uuid):
    """Migrate a yet-unpublished deposit to a versioning scheme."""
    deposit = ZenodoDeposit.get_record(uuid)
    if 'conceptrecid' in deposit:
        return
    # ASSERT ZENODO DOI ONLY!
    assert 'conceptrecid' not in deposit, 'Concept RECID already in record.'
    conceptrecid = zenodo_concept_recid_minter(uuid, deposit)
    recid = PersistentIdentifier.get('recid', str(deposit['recid']))
    depid = PersistentIdentifier.get('depid', str(deposit['_deposit']['id']))
    pv = PIDVersioning(parent=conceptrecid)
    pv.insert_draft_child(recid)
    RecordDraft.link(recid, depid)
    deposit.commit()
    db.session.commit()
Ejemplo n.º 12
0
def test_communities_newversion_while_ir_pending_bug(app, db, users,
                                                     communities, deposit,
                                                     deposit_file):
    """Make sure that pending IRs remain after a new version (bug)."""
    deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    # Two inclusion requests are pending
    assert InclusionRequest.query.count() == 2

    # Accept one community
    c1_api = ZenodoCommunity('c1')
    c1_api.accept_record(record_v1, pid=recid_v1)

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    # Make sure there is still IR to community 'c2' after newversion
    assert InclusionRequest.query.count() == 1
    assert InclusionRequest.query.one().id_community == 'c2'
    assert record_v1.get('communities', []) == [
        'c1',
    ]
    assert deposit_v1.get('communities', []) == [
        'c1',
        'c2',
    ]
    assert record_v2.get('communities', []) == [
        'c1',
    ]
    assert deposit_v2.get('communities', []) == [
        'c1',
        'c2',
    ]
Ejemplo n.º 13
0
def test_communities_newversion_addition(app, db, users, communities, deposit,
                                         deposit_file):
    """Make sure that new version of record synchronizes the communities."""
    deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')

    c1_api.accept_record(record_v1, pid=recid_v1)
    c2_api.accept_record(record_v1, pid=recid_v1)

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    # Remove 'c2' and request for 'c5'. Make sure that communities from
    # previous record version are preserved/removed properly
    deposit_v2['communities'] = ['c1', 'c5']
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    assert record_v1.get('communities', []) == [
        'c1',
    ]
    assert deposit_v1.get('communities', []) == [
        'c1',
        'c5',
    ]
    assert record_v2.get('communities', []) == [
        'c1',
    ]
    assert deposit_v2.get('communities', []) == [
        'c1',
        'c5',
    ]
Ejemplo n.º 14
0
def test_communities_newversion_while_ir_pending_bug(
        app, db, users, communities, deposit, deposit_file):
    """Make sure that pending IRs remain after a new version (bug)."""
    deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    # Two inclusion requests are pending
    assert InclusionRequest.query.count() == 2

    # Accept one community
    c1_api = ZenodoCommunity('c1')
    c1_api.accept_record(record_v1, pid=recid_v1)

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    # Make sure there is still IR to community 'c2' after newversion
    assert InclusionRequest.query.count() == 1
    assert InclusionRequest.query.one().id_community == 'c2'
    assert record_v1.get('communities', []) == ['c1', ]
    assert deposit_v1.get('communities', []) == ['c1', 'c2', ]
    assert record_v2.get('communities', []) == ['c1', ]
    assert deposit_v2.get('communities', []) == ['c1', 'c2', ]
Ejemplo n.º 15
0
def test_communities_newversion_addition(
        app, db, users, communities, deposit, deposit_file):
    """Make sure that new version of record synchronizes the communities."""
    deposit['communities'] = ['c1', 'c2']
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1_value = recid_v1.pid_value

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')

    c1_api.accept_record(record_v1, pid=recid_v1)
    c2_api.accept_record(record_v1, pid=recid_v1)

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value

    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())

    # Remove 'c2' and request for 'c5'. Make sure that communities from
    # previous record version are preserved/removed properly
    deposit_v2['communities'] = ['c1', 'c5']
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    assert record_v1.get('communities', []) == ['c1', ]
    assert deposit_v1.get('communities', []) == ['c1', 'c5', ]
    assert record_v2.get('communities', []) == ['c1', ]
    assert deposit_v2.get('communities', []) == ['c1', 'c5', ]
Ejemplo n.º 16
0
def get_github_repository(pid):
    """Get GitHub repository from depid."""
    depid = fetch_depid(pid)
    # First check if the passed depid is a GitHub release
    release = (Release.query.filter_by(record_id=depid.object_uuid)
               .one_or_none())
    if release:
        return release.repository

    deposit = ZenodoDeposit.get_record(depid.object_uuid)
    concepterecid = deposit.get('conceptrecid')
    if not concepterecid:
        return None
    parent = PersistentIdentifier.get(
        pid_type='recid', pid_value=concepterecid)
    pv = PIDVersioning(parent=parent)
    if pv.exists:
        record_ids = [r.object_uuid for r in pv.children]
        deposit_ids = (rec.depid.object_uuid
                       for rec in ZenodoRecord.get_records(record_ids))
        release = (Release.query
                   .filter(Release.record_id.in_(deposit_ids))
                   .first())
        return release.repository if release else None
Ejemplo n.º 17
0
def test_versioning_indexing(db, es, deposit, deposit_file):
    """Test the indexing of 'version' relations."""
    deposit_index_name = 'deposits-records-record-v1.0.0'
    records_index_name = 'records-record-v1.0.0'

    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    RecordIndexer().index_by_id(str(record_v1.id))
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)
    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']
    assert len(s_dep) == 1
    assert len(s_rec) == 1
    assert 'relations' in s_dep[0]['_source']
    assert 'relations' in s_rec[0]['_source']

    expected = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "count": 1,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert s_dep[0]['_source']['relations'] == expected
    assert s_rec[0]['_source']['relations'] == expected

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)
    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']

    assert len(s_dep) == 2  # Two deposits should be indexed
    assert len(s_rec) == 1  # One, since record does not exist yet

    s_dep1 = current_search.client.get(index=deposit_index_name,
                                       id=deposit_v1.id)
    s_dep2 = current_search.client.get(index=deposit_index_name,
                                       id=deposit_v2.id)

    expected_d1 = {
        "version": [{
            "draft_child_deposit": {
                "pid_type": "depid",
                "pid_value": "3"
            },
            "index": 0,
            "is_last": False,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 2  # For deposit, draft children are also counted
        }]
    }
    expected_d2 = {
        "version": [{
            "draft_child_deposit": {
                "pid_type": "depid",
                "pid_value": "3"
            },
            "index": 1,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "count": 2,  # For deposit, draft children are also counted
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }

    assert s_dep1['_source']['relations'] == expected_d1
    assert s_dep2['_source']['relations'] == expected_d2

    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    RecordIndexer().index_by_id(str(record_v2.id))
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)

    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']
    assert len(s_dep) == 2
    assert len(s_rec) == 2

    s_dep1 = current_search.client.get(index=deposit_index_name,
                                       id=deposit_v1.id)
    s_dep2 = current_search.client.get(index=deposit_index_name,
                                       id=deposit_v2.id)

    s_rec1 = current_search.client.get(index=records_index_name,
                                       id=record_v1.id)
    s_rec2 = current_search.client.get(index=records_index_name,
                                       id=record_v2.id)

    expected_d1 = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": False,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 2
        }]
    }
    expected_d2 = {
        "version": [{
            "draft_child_deposit": None,
            "index": 1,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "count": 2,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert s_dep1['_source']['relations'] == expected_d1
    assert s_dep2['_source']['relations'] == expected_d2

    expected_r1 = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": False,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 2
        }]
    }
    expected_r2 = {
        "version": [{
            "draft_child_deposit": None,
            "index": 1,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "count": 2,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert s_rec1['_source']['relations'] == expected_r1
    assert s_rec2['_source']['relations'] == expected_r2
def test_relations_serialization(app, db, deposit, deposit_file):
    """Serialize PID relations."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    recid_v1, record_v1 = deposit_v1.fetch_published()
    expected = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 1
            }
        ]
    }
    assert serialize_relations(recid_v1) == expected

    deposit_v1.newversion()
    # Should contain "draft_child_deposit" information
    expected = {
        "version": [
            {
                "draft_child_deposit": {
                    "pid_type": "depid",
                    "pid_value": "3"
                },
                "index": 0,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "count": 1,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert serialize_relations(recid_v1) == expected

    # Publish the new version
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()

    # Should no longer contain "draft_child_deposit" info after publishing
    # and no longer be the last child
    expected = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": False,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 2
            }
        ]
    }
    assert serialize_relations(recid_v1) == expected

    # New version should be the last child now
    expected = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 1,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "count": 2,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert serialize_relations(recid_v2) == expected
def test_related_identifiers_serialization(app, db, deposit, deposit_file):
    """Serialize PID Relations to related identifiers."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c2' through deposit v2
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    rids = serialize_related_identifiers(recid_v1)
    expected_v1 = [
        {
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.1',
            'relation': 'isPartOf'
        }
        # TODO: serialization of new version realtions is disabled
        # {
        #     'scheme': 'doi',
        #     'identifier': '10.5072/zenodo.3',
        #     'relation': 'isPreviousVersionOf'
        # }
    ]
    assert rids == expected_v1

    rids = serialize_related_identifiers(recid_v2)
    expected_v2 = [
        {
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.1',
            'relation': 'isPartOf'
        }
        # TODO: serialization of new version realtions is disabled
        # {
        #     'scheme': 'doi',
        #     'identifier': '10.5072/zenodo.2',
        #     'relation': 'isNewVersionOf'
        # }
    ]
    assert rids == expected_v2
    parent_pid = PersistentIdentifier.get('recid', '1')
    rids = serialize_related_identifiers(parent_pid)

    expected_parent = [
        {
            'relation': 'hasPart',
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.2'
        },
        {
            'relation': 'hasPart',
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.3'
        }
    ]
    assert rids == expected_parent
Ejemplo n.º 20
0
def versioning_github_repository(uuid):
    """
    Migrate the GitHub repositories.

    :param uuid: UUID of the repository (invenio_github.models.Repository)
    """
    from invenio_github.models import Repository, ReleaseStatus
    from zenodo.modules.deposit.minters import zenodo_concept_recid_minter
    from zenodo.modules.records.minters import zenodo_concept_doi_minter
    from invenio_pidrelations.contrib.records import index_siblings

    repository = Repository.query.get(uuid)
    published_releases = repository.releases.filter_by(
        status=ReleaseStatus.PUBLISHED).all()

    # Nothing to migrate if no successful release was ever made
    if not published_releases:
        return

    deposits = [
        ZenodoDeposit.get_record(r.record_id) for r in published_releases
        if r.recordmetadata.json is not None
    ]
    deposits = [dep for dep in deposits if 'removed_by' not in dep]
    deposits = sorted(deposits, key=lambda dep: int(dep['recid']))

    recids = [
        PersistentIdentifier.get('recid', dep['recid']) for dep in deposits
    ]
    records = [ZenodoRecord.get_record(p.object_uuid) for p in recids]

    # There were successful releases, but deposits/records were removed since
    if not records:
        return

    assert not any('conceptrecid' in rec for rec in records), \
        "One or more of the release records have been already migrated"
    assert not any('conceptrecid' in dep for dep in deposits), \
        "One or more of the release deposits have been already migrated"

    conceptrecid = zenodo_concept_recid_minter(record_uuid=records[0].id,
                                               data=records[0])
    conceptrecid.register()

    # Mint the Concept DOI if we are migrating (linking) more than one record
    if len(records) > 1:
        conceptdoi = zenodo_concept_doi_minter(records[0].id, records[0])
    else:
        conceptdoi = None

    rec_comms = sorted(
        set(sum([rec.get('communities', []) for rec in records], [])))

    dep_comms = sorted(
        set(sum([dep.get('communities', []) for dep in deposits], [])))

    for rec in records:
        rec['conceptrecid'] = conceptrecid.pid_value
        if conceptdoi:
            rec['conceptdoi'] = conceptdoi.pid_value
        if rec_comms:
            rec['communities'] = rec_comms
        rec.commit()

    for dep in deposits:
        dep['conceptrecid'] = conceptrecid.pid_value
        if conceptdoi:
            dep['conceptdoi'] = conceptdoi.pid_value
        if dep_comms:
            dep['communities'] = dep_comms
        dep.commit()

    pv = PIDVersioning(parent=conceptrecid)
    for recid in recids:
        pv.insert_child(recid)
    pv.update_redirect()

    if current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']:
        datacite_register.delay(recids[-1].pid_value, str(records[-1].id))
    db.session.commit()

    # Reindex all siblings
    index_siblings(pv.last_child, with_deposits=True)
Ejemplo n.º 21
0
def test_autoadd(app, db, users, communities, deposit, deposit_file,
                 communities_autoadd_enabled):
    """Test basic workflow using Deposit and Communities API."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c3' (owned by user) through deposit v2
    deposit_v2['communities'] = [
        'c1',
        'c2',
        'c3',
    ]
    deposit_v2['grants'] = [
        {
            'title': 'SomeGrant'
        },
    ]
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    assert record_v2['grants'] == [
        {
            'title': 'SomeGrant'
        },
    ]
    recid_v2_value = recid_v2.pid_value
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    assert record_v1.get('communities', []) == ['c3', 'grants_comm']
    assert record_v2.get('communities', []) == ['c3', 'grants_comm']
    assert deposit_v1.get('communities', []) == [
        'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'
    ]
    assert deposit_v2.get('communities', []) == [
        'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'
    ]

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')
    c3_api = ZenodoCommunity('c3')
    grants_comm_api = ZenodoCommunity('grants_comm')
    ecfunded_api = ZenodoCommunity('ecfunded')
    zenodo_api = ZenodoCommunity('zenodo')

    # Inclusion requests should be visible for both records
    assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert c3_api.get_comm_irs(record_v1, pid=recid_v1).count() == 0
    assert c3_api.get_comm_irs(record_v2, pid=recid_v2).count() == 0
    assert grants_comm_api.get_comm_irs(record_v1, pid=recid_v1).count() == 0
    assert grants_comm_api.get_comm_irs(record_v2, pid=recid_v2).count() == 0
    assert ecfunded_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert ecfunded_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert zenodo_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert zenodo_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    # Accept to 'c1' through record_v2 (as originally requested),
    # and 'c2' through record_v1 (resolved through version)
    c1_api.accept_record(record_v2, pid=recid_v2)
    c2_api.accept_record(record_v1, pid=recid_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    # Accepting individual record to a community should propagate the changes
    # to all versions
    assert record_v1.get('communities',
                         []) == ['c1', 'c2', 'c3', 'grants_comm']
    assert record_v2.get('communities',
                         []) == ['c1', 'c2', 'c3', 'grants_comm']
    assert deposit_v1.get('communities', []) == [
        'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'
    ]
    assert deposit_v2.get('communities', []) == [
        'c1', 'c2', 'c3', 'ecfunded', 'grants_comm', 'zenodo'
    ]

    # Removing 'c1'-'c3' from deposit_v1 should remove it from two published
    # records and other deposits as well
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1 = deposit_v1.edit()
    deposit_v1['communities'] = []
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    assert record_v1.get('communities', []) == [
        'grants_comm',
    ]
    assert record_v2.get('communities', []) == [
        'grants_comm',
    ]
    assert deposit_v1.get('communities',
                          []) == ['ecfunded', 'grants_comm', 'zenodo']
    assert deposit_v2.get('communities',
                          []) == ['ecfunded', 'grants_comm', 'zenodo']
Ejemplo n.º 22
0
def test_relations_serialization(app, db, deposit, deposit_file):
    """Serialize PID relations."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    recid_v1, record_v1 = deposit_v1.fetch_published()
    expected = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 1
        }]
    }
    assert serialize_relations(recid_v1) == expected

    deposit_v1.newversion()
    # Should contain "draft_child_deposit" information
    expected = {
        "version": [{
            "draft_child_deposit": {
                "pid_type": "depid",
                "pid_value": "3"
            },
            "index": 0,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "2"
            },
            "count": 1,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert serialize_relations(recid_v1) == expected

    # Publish the new version
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()

    # Should no longer contain "draft_child_deposit" info after publishing
    # and no longer be the last child
    expected = {
        "version": [{
            "draft_child_deposit": None,
            "index": 0,
            "is_last": False,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
            "count": 2
        }]
    }
    assert serialize_relations(recid_v1) == expected

    # New version should be the last child now
    expected = {
        "version": [{
            "draft_child_deposit": None,
            "index": 1,
            "is_last": True,
            "last_child": {
                "pid_type": "recid",
                "pid_value": "3"
            },
            "count": 2,
            "parent": {
                "pid_type": "recid",
                "pid_value": "1"
            },
        }]
    }
    assert serialize_relations(recid_v2) == expected
Ejemplo n.º 23
0
def test_versioning_indexing(db, es, deposit, deposit_file):
    """Test the indexing of 'version' relations."""
    deposit_index_name = 'deposits-records-record-v1.0.0'
    records_index_name = 'records-record-v1.0.0'

    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    RecordIndexer().index_by_id(str(record_v1.id))
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)
    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']
    assert len(s_dep) == 1
    assert len(s_rec) == 1
    assert 'relations' in s_dep[0]['_source']
    assert 'relations' in s_rec[0]['_source']

    expected = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "count": 1,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert s_dep[0]['_source']['relations'] == expected
    assert s_rec[0]['_source']['relations'] == expected

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)
    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']

    assert len(s_dep) == 2  # Two deposits should be indexed
    assert len(s_rec) == 1  # One, since record does not exist yet

    s_dep1 = current_search.client.get(
        index=deposit_index_name, id=deposit_v1.id)
    s_dep2 = current_search.client.get(
        index=deposit_index_name, id=deposit_v2.id)

    expected_d1 = {
        "version": [
            {
                "draft_child_deposit": {
                    "pid_type": "depid",
                    "pid_value": "3"
                },
                "index": 0,
                "is_last": False,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 2  # For deposit, draft children are also counted
            }
        ]
    }
    expected_d2 = {
        "version": [
            {
                "draft_child_deposit": {
                    "pid_type": "depid",
                    "pid_value": "3"
                },
                "index": 1,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "2"
                },
                "count": 2,  # For deposit, draft children are also counted
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }

    assert s_dep1['_source']['relations'] == expected_d1
    assert s_dep2['_source']['relations'] == expected_d2

    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    RecordIndexer().index_by_id(str(record_v2.id))
    RecordIndexer().process_bulk_queue()
    current_search.flush_and_refresh(index=deposit_index_name)
    current_search.flush_and_refresh(index=records_index_name)

    s_dep = current_search.client.search(
        index=deposit_index_name)['hits']['hits']
    s_rec = current_search.client.search(
        index=records_index_name)['hits']['hits']
    assert len(s_dep) == 2
    assert len(s_rec) == 2

    s_dep1 = current_search.client.get(
        index=deposit_index_name, id=deposit_v1.id)
    s_dep2 = current_search.client.get(
        index=deposit_index_name, id=deposit_v2.id)

    s_rec1 = current_search.client.get(
        index=records_index_name, id=record_v1.id)
    s_rec2 = current_search.client.get(
        index=records_index_name, id=record_v2.id)

    expected_d1 = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": False,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 2
            }
        ]
    }
    expected_d2 = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 1,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "count": 2,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert s_dep1['_source']['relations'] == expected_d1
    assert s_dep2['_source']['relations'] == expected_d2

    expected_r1 = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 0,
                "is_last": False,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
                "count": 2
            }
        ]
    }
    expected_r2 = {
        "version": [
            {
                "draft_child_deposit": None,
                "index": 1,
                "is_last": True,
                "last_child": {
                    "pid_type": "recid",
                    "pid_value": "3"
                },
                "count": 2,
                "parent": {
                    "pid_type": "recid",
                    "pid_value": "1"
                },
            }
        ]
    }
    assert s_rec1['_source']['relations'] == expected_r1
    assert s_rec2['_source']['relations'] == expected_r2
Ejemplo n.º 24
0
def test_autoadd(app, db, users, communities, deposit, deposit_file,
                 communities_autoadd_enabled):
    """Test basic workflow using Deposit and Communities API."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_value = recid_v1.pid_value

    deposit_v1 = deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    depid_v2_value = depid_v2.pid_value
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c3' (owned by user) through deposit v2
    deposit_v2['communities'] = ['c1', 'c2', 'c3', ]
    deposit_v2['grants'] = [{'title': 'SomeGrant'}, ]
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    assert record_v2['grants'] == [{'title': 'SomeGrant'}, ]
    recid_v2_value = recid_v2.pid_value
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    assert record_v1.get('communities', []) == ['c3', 'grants_comm']
    assert record_v2.get('communities', []) == ['c3', 'grants_comm']
    assert deposit_v1.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded',
                                                 'grants_comm', 'zenodo']
    assert deposit_v2.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded',
                                                 'grants_comm', 'zenodo']

    c1_api = ZenodoCommunity('c1')
    c2_api = ZenodoCommunity('c2')
    c3_api = ZenodoCommunity('c3')
    grants_comm_api = ZenodoCommunity('grants_comm')
    ecfunded_api = ZenodoCommunity('ecfunded')
    zenodo_api = ZenodoCommunity('zenodo')

    # Inclusion requests should be visible for both records
    assert c1_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c1_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert c2_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert c2_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1
    assert c3_api.get_comm_irs(record_v1, pid=recid_v1).count() == 0
    assert c3_api.get_comm_irs(record_v2, pid=recid_v2).count() == 0
    assert grants_comm_api.get_comm_irs(
        record_v1, pid=recid_v1).count() == 0
    assert grants_comm_api.get_comm_irs(
        record_v2, pid=recid_v2).count() == 0
    assert ecfunded_api.get_comm_irs(
        record_v1, pid=recid_v1).count() == 1
    assert ecfunded_api.get_comm_irs(
        record_v2, pid=recid_v2).count() == 1
    assert zenodo_api.get_comm_irs(record_v1, pid=recid_v1).count() == 1
    assert zenodo_api.get_comm_irs(record_v2, pid=recid_v2).count() == 1

    # Accept to 'c1' through record_v2 (as originally requested),
    # and 'c2' through record_v1 (resolved through version)
    c1_api.accept_record(record_v2, pid=recid_v2)
    c2_api.accept_record(record_v1, pid=recid_v1)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    # Accepting individual record to a community should propagate the changes
    # to all versions
    assert record_v1.get('communities', []) == ['c1', 'c2', 'c3',
                                                'grants_comm']
    assert record_v2.get('communities', []) == ['c1', 'c2', 'c3',
                                                'grants_comm']
    assert deposit_v1.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded',
                                                 'grants_comm', 'zenodo']
    assert deposit_v2.get('communities', []) == ['c1', 'c2', 'c3', 'ecfunded',
                                                 'grants_comm', 'zenodo']

    # Removing 'c1'-'c3' from deposit_v1 should remove it from two published
    # records and other deposits as well
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    deposit_v1 = deposit_v1.edit()
    deposit_v1['communities'] = []
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    depid_v2, deposit_v2 = deposit_resolver.resolve(depid_v2_value)
    recid_v1, record_v1 = record_resolver.resolve(recid_v1_value)
    recid_v2, record_v2 = record_resolver.resolve(recid_v2_value)
    assert record_v1.get('communities', []) == ['grants_comm', ]
    assert record_v2.get('communities', []) == ['grants_comm', ]
    assert deposit_v1.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
    assert deposit_v2.get('communities', []) == ['ecfunded', 'grants_comm',
                                                 'zenodo']
Ejemplo n.º 25
0
def test_archiving(app, db, deposit, deposit_file, locations, archive_fs):
    """Test ZenodoSIP archiving."""
    # Stash the configuration and enable writing
    orig = app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED']
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True
    deposit.files['test2.txt'] = BytesIO(b'test-two')
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_id = recid_v1.id
    # Record files after publishing: 'test.txt', 'test2.txt'

    sip1 = SIP(SIPModel.query.one())
    sip1_id = sip1.id

    # Edit the metadata
    deposit_v1 = deposit_v1.edit()
    deposit_v1['title'] = "New title"
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    # Record files after publishing: 'test.txt', 'test2.txt'
    sip2_id = SIPModel.query.order_by(SIPModel.created.desc()).first().id

    # Create a new version
    deposit_v1.newversion()
    recid_v1 = PersistentIdentifier.query.get(recid_v1_id)
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    del deposit_v2.files['test.txt']
    deposit_v2.files['test3.txt'] = BytesIO(b('test-three'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    # Record files after publishing: 'test2.txt', 'test3.txt'

    sip1 = SIP(SIPModel.query.get(sip1_id))
    sip2 = SIP(SIPModel.query.get(sip2_id))
    sip3 = SIP(SIPModel.query.order_by(SIPModel.created.desc()).first())

    # Becase we are using secure_filename when writing SIPFiles we need to
    # genenarate the correct names: <SIPFile.id>-<secure_filename>
    s1_file1_fn = '{0}-test.txt'.format(fetch_suff(sip1, 'test.txt').file_id)
    s1_file1_fp = 'data/files/{0}'.format(s1_file1_fn)

    s1_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip1, 'test2.txt').file_id)
    s1_file2_fp = 'data/files/{0}'.format(s1_file2_fn)

    s3_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip3, 'test2.txt').file_id)
    s3_file2_fp = 'data/files/{0}'.format(s3_file2_fn)

    s3_file3_fn = '{0}-test3.txt'.format(fetch_suff(sip3, 'test3.txt').file_id)
    s3_file3_fp = 'data/files/{0}'.format(s3_file3_fn)

    sip1_bagmeta = json.loads(next(
        m.content for m in sip1.metadata if m.type.name == 'bagit'))['files']
    sip2_bagmeta = json.loads(next(
        m.content for m in sip2.metadata if m.type.name == 'bagit'))['files']
    sip3_bagmeta = json.loads(next(
        m.content for m in sip3.metadata if m.type.name == 'bagit'))['files']

    # Check if Bagit metadata contains the correct file-fetching information
    assert set([f['filepath'] for f in sip1_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt'])
    assert not BagItArchiver._is_fetched(
        get_m_item(sip1_bagmeta, s1_file1_fp))
    assert not BagItArchiver._is_fetched(
        get_m_item(sip1_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip2_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])
    # Both files should be fetched since it's only metadata-edit submission
    assert BagItArchiver._is_fetched(
        get_m_item(sip2_bagmeta, s1_file1_fp))
    assert BagItArchiver._is_fetched(
        get_m_item(sip2_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip3_bagmeta]) == \
        set([s3_file2_fp,
             s3_file3_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])

    # First file should be fetched from previous version and new file should
    # be archived in this bag.
    assert BagItArchiver._is_fetched(
        get_m_item(sip3_bagmeta, s3_file2_fp))
    assert not BagItArchiver._is_fetched(
        get_m_item(sip3_bagmeta, s3_file3_fp))
    archiver1 = BagItArchiver(sip1)
    archiver2 = BagItArchiver(sip2)
    archiver3 = BagItArchiver(sip3)

    # Each archiver subpath follows: '<recid>/r/<ISO-8601-SIP-timestamp>'
    sip1_ts = arrow.get(sip1.model.created).isoformat()
    sip2_ts = arrow.get(sip2.model.created).isoformat()
    sip3_ts = arrow.get(sip3.model.created).isoformat()
    assert archiver1.get_archive_subpath() == '2/r/{0}'.format(sip1_ts)
    assert archiver2.get_archive_subpath() == '2/r/{0}'.format(sip2_ts)
    assert archiver3.get_archive_subpath() == '3/r/{0}'.format(sip3_ts)

    # As a test, write the SIPs in reverse chronological order
    assert not sip1.archived
    assert not sip2.archived
    assert not sip3.archived
    archive_sip.delay(sip3.id)
    archive_sip.delay(sip2.id)
    archive_sip.delay(sip1.id)
    assert sip1.archived
    assert sip2.archived
    assert sip3.archived

    fs1 = archive_fs.opendir(archiver1.get_archive_subpath())
    assert set(fs1.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt',
                                      'manifest-md5.txt', 'bag-info.txt',
                                      'data'])
    assert set(fs1.listdir('data')) == set(['metadata', 'files',
                                            'filenames.txt'])
    assert fs1.listdir('data/metadata') == ['record-json.json', ]
    assert set(fs1.listdir('data/files')) == set([s1_file1_fn, s1_file2_fn])

    fs2 = archive_fs.opendir(archiver2.get_archive_subpath())
    assert set(fs2.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt',
                                      'manifest-md5.txt', 'bag-info.txt',
                                      'data', 'fetch.txt'])
    # Second SIP has written only the metadata,
    # because of that There should be no 'files/', but 'filesnames.txt' should
    # still be there becasue of the fetch.txt
    assert set(fs2.listdir('data')) == set(['metadata', 'filenames.txt'])
    assert fs2.listdir('data/metadata') == ['record-json.json', ]

    with fs2.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Fetched files should correctly fetch the files from the first archive
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 4 {fn}'.format(fn=s1_file1_fp, base=base_uri,
                                               s1ts=sip1_ts),
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s1_file2_fp, base=base_uri,
                                               s1ts=sip1_ts),
    ])

    fs3 = archive_fs.opendir(archiver3.get_archive_subpath())
    assert set(fs3.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt',
                                      'manifest-md5.txt', 'bag-info.txt',
                                      'data', 'fetch.txt'])
    # Third SIP should write only the extra 'test3.txt' file
    assert set(fs3.listdir('data')) == set(['metadata', 'files',
                                            'filenames.txt'])
    assert fs3.listdir('data/metadata') == ['record-json.json', ]
    assert fs3.listdir('data/files') == [s3_file3_fn, ]
    with fs3.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Since 'file.txt' was removed in third SIP, we should only fetch the
    # 'test2.txt', also from the first archive, since that's where this
    # file resides physically.
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s3_file2_fp, base=base_uri,
                                               s1ts=sip1_ts),
    ])
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig
Ejemplo n.º 26
0
def test_archiving(app, db, deposit, deposit_file, locations, archive_fs):
    """Test ZenodoSIP archiving."""
    # Stash the configuration and enable writing
    orig = app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED']
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True
    deposit.files['test2.txt'] = BytesIO(b'test-two')
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_id = recid_v1.id
    # Record files after publishing: 'test.txt', 'test2.txt'

    sip1 = SIP(SIPModel.query.one())
    sip1_id = sip1.id

    # Edit the metadata
    deposit_v1 = deposit_v1.edit()
    deposit_v1['title'] = "New title"
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    # Record files after publishing: 'test.txt', 'test2.txt'
    sip2_id = SIPModel.query.order_by(SIPModel.created.desc()).first().id

    # Create a new version
    deposit_v1.newversion()
    recid_v1 = PersistentIdentifier.query.get(recid_v1_id)
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    del deposit_v2.files['test.txt']
    deposit_v2.files['test3.txt'] = BytesIO(b('test-three'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    # Record files after publishing: 'test2.txt', 'test3.txt'

    sip1 = SIP(SIPModel.query.get(sip1_id))
    sip2 = SIP(SIPModel.query.get(sip2_id))
    sip3 = SIP(SIPModel.query.order_by(SIPModel.created.desc()).first())

    # Becase we are using secure_filename when writing SIPFiles we need to
    # genenarate the correct names: <SIPFile.id>-<secure_filename>
    s1_file1_fn = '{0}-test.txt'.format(fetch_suff(sip1, 'test.txt').file_id)
    s1_file1_fp = 'data/files/{0}'.format(s1_file1_fn)

    s1_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip1, 'test2.txt').file_id)
    s1_file2_fp = 'data/files/{0}'.format(s1_file2_fn)

    s3_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip3, 'test2.txt').file_id)
    s3_file2_fp = 'data/files/{0}'.format(s3_file2_fn)

    s3_file3_fn = '{0}-test3.txt'.format(fetch_suff(sip3, 'test3.txt').file_id)
    s3_file3_fp = 'data/files/{0}'.format(s3_file3_fn)

    sip1_bagmeta = json.loads(
        next(m.content for m in sip1.metadata
             if m.type.name == 'bagit'))['files']
    sip2_bagmeta = json.loads(
        next(m.content for m in sip2.metadata
             if m.type.name == 'bagit'))['files']
    sip3_bagmeta = json.loads(
        next(m.content for m in sip3.metadata
             if m.type.name == 'bagit'))['files']

    # Check if Bagit metadata contains the correct file-fetching information
    assert set([f['filepath'] for f in sip1_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt'])
    assert not BagItArchiver._is_fetched(get_m_item(sip1_bagmeta, s1_file1_fp))
    assert not BagItArchiver._is_fetched(get_m_item(sip1_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip2_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])
    # Both files should be fetched since it's only metadata-edit submission
    assert BagItArchiver._is_fetched(get_m_item(sip2_bagmeta, s1_file1_fp))
    assert BagItArchiver._is_fetched(get_m_item(sip2_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip3_bagmeta]) == \
        set([s3_file2_fp,
             s3_file3_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])

    # First file should be fetched from previous version and new file should
    # be archived in this bag.
    assert BagItArchiver._is_fetched(get_m_item(sip3_bagmeta, s3_file2_fp))
    assert not BagItArchiver._is_fetched(get_m_item(sip3_bagmeta, s3_file3_fp))
    archiver1 = BagItArchiver(sip1)
    archiver2 = BagItArchiver(sip2)
    archiver3 = BagItArchiver(sip3)

    # Each archiver subpath follows: '<recid>/r/<ISO-8601-SIP-timestamp>'
    sip1_ts = arrow.get(sip1.model.created).isoformat()
    sip2_ts = arrow.get(sip2.model.created).isoformat()
    sip3_ts = arrow.get(sip3.model.created).isoformat()
    assert archiver1.get_archive_subpath() == '2/r/{0}'.format(sip1_ts)
    assert archiver2.get_archive_subpath() == '2/r/{0}'.format(sip2_ts)
    assert archiver3.get_archive_subpath() == '3/r/{0}'.format(sip3_ts)

    # As a test, write the SIPs in reverse chronological order
    assert not sip1.archived
    assert not sip2.archived
    assert not sip3.archived
    archive_sip.delay(sip3.id)
    archive_sip.delay(sip2.id)
    archive_sip.delay(sip1.id)
    assert sip1.archived
    assert sip2.archived
    assert sip3.archived

    fs1 = archive_fs.opendir(archiver1.get_archive_subpath())
    assert set(fs1.listdir()) == set([
        'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt',
        'data'
    ])
    assert set(fs1.listdir('data')) == set(
        ['metadata', 'files', 'filenames.txt'])
    assert fs1.listdir('data/metadata') == [
        'record-json.json',
    ]
    assert set(fs1.listdir('data/files')) == set([s1_file1_fn, s1_file2_fn])

    fs2 = archive_fs.opendir(archiver2.get_archive_subpath())
    assert set(fs2.listdir()) == set([
        'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt',
        'data', 'fetch.txt'
    ])
    # Second SIP has written only the metadata,
    # because of that There should be no 'files/', but 'filesnames.txt' should
    # still be there becasue of the fetch.txt
    assert set(fs2.listdir('data')) == set(['metadata', 'filenames.txt'])
    assert fs2.listdir('data/metadata') == [
        'record-json.json',
    ]

    with fs2.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Fetched files should correctly fetch the files from the first archive
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 4 {fn}'.format(fn=s1_file1_fp,
                                               base=base_uri,
                                               s1ts=sip1_ts),
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s1_file2_fp,
                                               base=base_uri,
                                               s1ts=sip1_ts),
    ])

    fs3 = archive_fs.opendir(archiver3.get_archive_subpath())
    assert set(fs3.listdir()) == set([
        'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt',
        'data', 'fetch.txt'
    ])
    # Third SIP should write only the extra 'test3.txt' file
    assert set(fs3.listdir('data')) == set(
        ['metadata', 'files', 'filenames.txt'])
    assert fs3.listdir('data/metadata') == [
        'record-json.json',
    ]
    assert fs3.listdir('data/files') == [
        s3_file3_fn,
    ]
    with fs3.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Since 'file.txt' was removed in third SIP, we should only fetch the
    # 'test2.txt', also from the first archive, since that's where this
    # file resides physically.
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s3_file2_fp,
                                               base=base_uri,
                                               s1ts=sip1_ts),
    ])
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig
Ejemplo n.º 27
0
    def publish(self):
        """Publish GitHub release as record."""
        id_ = uuid.uuid4()
        deposit_metadata = dict(self.metadata)
        deposit = None
        try:
            db.session.begin_nested()
            # TODO: Add filter on Published releases
            previous_releases = self.model.repository.releases.filter_by(
                status=ReleaseStatus.PUBLISHED)
            versioning = None
            stashed_draft_child = None
            if previous_releases.count():
                last_release = previous_releases.order_by(
                        Release.created.desc()).first()
                last_recid = PersistentIdentifier.get(
                    'recid', last_release.record['recid'])
                versioning = PIDVersioning(child=last_recid)
                last_record = ZenodoRecord.get_record(
                    versioning.last_child.object_uuid)
                deposit_metadata['conceptrecid'] = last_record['conceptrecid']
                if 'conceptdoi' not in last_record:
                    last_depid = PersistentIdentifier.get(
                        'depid', last_record['_deposit']['id'])
                    last_deposit = ZenodoDeposit.get_record(
                        last_depid.object_uuid)
                    last_deposit = last_deposit.registerconceptdoi()
                    last_recid, last_record = last_deposit.fetch_published()
                deposit_metadata['conceptdoi'] = last_record['conceptdoi']
                if versioning.draft_child:
                    stashed_draft_child = versioning.draft_child
                    versioning.remove_draft_child()

            deposit = self.deposit_class.create(deposit_metadata, id_=id_)

            deposit['_deposit']['created_by'] = self.event.user_id
            deposit['_deposit']['owners'] = [self.event.user_id]

            # Fetch the deposit files
            for key, url in self.files:
                # Make a HEAD request to get GitHub to compute the
                # Content-Length.
                res = self.gh.api.session.head(url, allow_redirects=True)
                # Now, download the file
                res = self.gh.api.session.get(url, stream=True,
                                              allow_redirects=True)
                if res.status_code != 200:
                    raise Exception(
                        "Could not retrieve archive from GitHub: {url}"
                        .format(url=url)
                    )

                size = int(res.headers.get('Content-Length', 0))
                ObjectVersion.create(
                    bucket=deposit.files.bucket,
                    key=key,
                    stream=res.raw,
                    size=size or None,
                    mimetype=res.headers.get('Content-Type'),
                )

            # GitHub-specific SIP store agent
            sip_agent = {
                '$schema': current_jsonschemas.path_to_url(
                    current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']),
                'user_id': self.event.user_id,
                'github_id': self.release['author']['id'],
                'email': self.gh.account.user.email,
            }
            deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent)
            recid_pid, record = deposit.fetch_published()
            self.model.recordmetadata = record.model
            if versioning and stashed_draft_child:
                versioning.insert_draft_child(stashed_draft_child)
            record_id = str(record.id)
            db.session.commit()

            # Send Datacite DOI registration task
            datacite_register.delay(recid_pid.pid_value, record_id)

            # Index the record
            RecordIndexer().index_by_id(record_id)
        except Exception:
            db.session.rollback()
            # Remove deposit from index since it was not commited.
            if deposit and deposit.id:
                try:
                    RecordIndexer().delete(deposit)
                except Exception:
                    current_app.logger.exception(
                        "Failed to remove uncommited deposit from index.")
            raise
Ejemplo n.º 28
0
def test_related_identifiers_serialization(app, db, deposit, deposit_file):
    """Serialize PID Relations to related identifiers."""
    deposit_v1 = publish_and_expunge(db, deposit)
    depid_v1_value = deposit_v1['_deposit']['id']

    recid_v1, record_v1 = deposit_v1.fetch_published()

    deposit_v1.newversion()
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.get_assigned_object())
    deposit_v2.files['file.txt'] = BytesIO(b('file1'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    deposit_v2 = deposit_v2.edit()
    # 1. Request for 'c1' and 'c2' through deposit v2
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    recid_v2, record_v2 = deposit_v2.fetch_published()
    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)
    recid_v1, record_v1 = deposit_v1.fetch_published()

    depid_v1, deposit_v1 = deposit_resolver.resolve(depid_v1_value)

    rids = serialize_related_identifiers(recid_v1)
    expected_v1 = [
        {
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.1',
            'relation': 'isPartOf'
        }
        # TODO: serialization of new version realtions is disabled
        # {
        #     'scheme': 'doi',
        #     'identifier': '10.5072/zenodo.3',
        #     'relation': 'isPreviousVersionOf'
        # }
    ]
    assert rids == expected_v1

    rids = serialize_related_identifiers(recid_v2)
    expected_v2 = [
        {
            'scheme': 'doi',
            'identifier': '10.5072/zenodo.1',
            'relation': 'isPartOf'
        }
        # TODO: serialization of new version realtions is disabled
        # {
        #     'scheme': 'doi',
        #     'identifier': '10.5072/zenodo.2',
        #     'relation': 'isNewVersionOf'
        # }
    ]
    assert rids == expected_v2
    parent_pid = PersistentIdentifier.get('recid', '1')
    rids = serialize_related_identifiers(parent_pid)

    expected_parent = [{
        'relation': 'hasPart',
        'scheme': 'doi',
        'identifier': '10.5072/zenodo.2'
    }, {
        'relation': 'hasPart',
        'scheme': 'doi',
        'identifier': '10.5072/zenodo.3'
    }]
    assert rids == expected_parent