def test_write_all_files(sips, archive_fs):
    """Test the functions used to create an export of the SIP."""
    sip = sips[0]
    archiver = BagItArchiver(sip)
    assert not len(archive_fs.listdir())
    archiver.write_all_files()
    assert len(archive_fs.listdir()) == 1
    fs = archive_fs.opendir(archiver.get_archive_subpath())
    assert set(fs.listdir()) == \
        set(['tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt',
             'bag-info.txt', 'data', ])
    assert set(fs.listdir('data')) == \
        set(['metadata', 'files', 'filenames.txt'])
    assert set(fs.listdir('data/metadata')) == \
        set(['marcxml-test.xml', 'json-test.json', ])
    assert set(fs.listdir('data/files')) == set(['foobar.txt', ])
Beispiel #2
0
def test_archiving(app, db, deposit, deposit_file, locations, archive_fs):
    """Test ZenodoSIP archiving."""
    # Stash the configuration and enable writing
    orig = app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED']
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True
    deposit.files['test2.txt'] = BytesIO(b'test-two')
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_id = recid_v1.id
    # Record files after publishing: 'test.txt', 'test2.txt'

    sip1 = SIP(SIPModel.query.one())
    sip1_id = sip1.id

    # Edit the metadata
    deposit_v1 = deposit_v1.edit()
    deposit_v1['title'] = "New title"
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    # Record files after publishing: 'test.txt', 'test2.txt'
    sip2_id = SIPModel.query.order_by(SIPModel.created.desc()).first().id

    # Create a new version
    deposit_v1.newversion()
    recid_v1 = PersistentIdentifier.query.get(recid_v1_id)
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    del deposit_v2.files['test.txt']
    deposit_v2.files['test3.txt'] = BytesIO(b('test-three'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    # Record files after publishing: 'test2.txt', 'test3.txt'

    sip1 = SIP(SIPModel.query.get(sip1_id))
    sip2 = SIP(SIPModel.query.get(sip2_id))
    sip3 = SIP(SIPModel.query.order_by(SIPModel.created.desc()).first())

    # Becase we are using secure_filename when writing SIPFiles we need to
    # genenarate the correct names: <SIPFile.id>-<secure_filename>
    s1_file1_fn = '{0}-test.txt'.format(fetch_suff(sip1, 'test.txt').file_id)
    s1_file1_fp = 'data/files/{0}'.format(s1_file1_fn)

    s1_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip1, 'test2.txt').file_id)
    s1_file2_fp = 'data/files/{0}'.format(s1_file2_fn)

    s3_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip3, 'test2.txt').file_id)
    s3_file2_fp = 'data/files/{0}'.format(s3_file2_fn)

    s3_file3_fn = '{0}-test3.txt'.format(fetch_suff(sip3, 'test3.txt').file_id)
    s3_file3_fp = 'data/files/{0}'.format(s3_file3_fn)

    sip1_bagmeta = json.loads(
        next(m.content for m in sip1.metadata
             if m.type.name == 'bagit'))['files']
    sip2_bagmeta = json.loads(
        next(m.content for m in sip2.metadata
             if m.type.name == 'bagit'))['files']
    sip3_bagmeta = json.loads(
        next(m.content for m in sip3.metadata
             if m.type.name == 'bagit'))['files']

    # Check if Bagit metadata contains the correct file-fetching information
    assert set([f['filepath'] for f in sip1_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt'])
    assert not BagItArchiver._is_fetched(get_m_item(sip1_bagmeta, s1_file1_fp))
    assert not BagItArchiver._is_fetched(get_m_item(sip1_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip2_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])
    # Both files should be fetched since it's only metadata-edit submission
    assert BagItArchiver._is_fetched(get_m_item(sip2_bagmeta, s1_file1_fp))
    assert BagItArchiver._is_fetched(get_m_item(sip2_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip3_bagmeta]) == \
        set([s3_file2_fp,
             s3_file3_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])

    # First file should be fetched from previous version and new file should
    # be archived in this bag.
    assert BagItArchiver._is_fetched(get_m_item(sip3_bagmeta, s3_file2_fp))
    assert not BagItArchiver._is_fetched(get_m_item(sip3_bagmeta, s3_file3_fp))
    archiver1 = BagItArchiver(sip1)
    archiver2 = BagItArchiver(sip2)
    archiver3 = BagItArchiver(sip3)

    # Each archiver subpath follows: '<recid>/r/<ISO-8601-SIP-timestamp>'
    sip1_ts = arrow.get(sip1.model.created).isoformat()
    sip2_ts = arrow.get(sip2.model.created).isoformat()
    sip3_ts = arrow.get(sip3.model.created).isoformat()
    assert archiver1.get_archive_subpath() == '2/r/{0}'.format(sip1_ts)
    assert archiver2.get_archive_subpath() == '2/r/{0}'.format(sip2_ts)
    assert archiver3.get_archive_subpath() == '3/r/{0}'.format(sip3_ts)

    # As a test, write the SIPs in reverse chronological order
    assert not sip1.archived
    assert not sip2.archived
    assert not sip3.archived
    archive_sip.delay(sip3.id)
    archive_sip.delay(sip2.id)
    archive_sip.delay(sip1.id)
    assert sip1.archived
    assert sip2.archived
    assert sip3.archived

    fs1 = archive_fs.opendir(archiver1.get_archive_subpath())
    assert set(fs1.listdir()) == set([
        'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt',
        'data'
    ])
    assert set(fs1.listdir('data')) == set(
        ['metadata', 'files', 'filenames.txt'])
    assert fs1.listdir('data/metadata') == [
        'record-json.json',
    ]
    assert set(fs1.listdir('data/files')) == set([s1_file1_fn, s1_file2_fn])

    fs2 = archive_fs.opendir(archiver2.get_archive_subpath())
    assert set(fs2.listdir()) == set([
        'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt',
        'data', 'fetch.txt'
    ])
    # Second SIP has written only the metadata,
    # because of that There should be no 'files/', but 'filesnames.txt' should
    # still be there becasue of the fetch.txt
    assert set(fs2.listdir('data')) == set(['metadata', 'filenames.txt'])
    assert fs2.listdir('data/metadata') == [
        'record-json.json',
    ]

    with fs2.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Fetched files should correctly fetch the files from the first archive
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 4 {fn}'.format(fn=s1_file1_fp,
                                               base=base_uri,
                                               s1ts=sip1_ts),
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s1_file2_fp,
                                               base=base_uri,
                                               s1ts=sip1_ts),
    ])

    fs3 = archive_fs.opendir(archiver3.get_archive_subpath())
    assert set(fs3.listdir()) == set([
        'tagmanifest-md5.txt', 'bagit.txt', 'manifest-md5.txt', 'bag-info.txt',
        'data', 'fetch.txt'
    ])
    # Third SIP should write only the extra 'test3.txt' file
    assert set(fs3.listdir('data')) == set(
        ['metadata', 'files', 'filenames.txt'])
    assert fs3.listdir('data/metadata') == [
        'record-json.json',
    ]
    assert fs3.listdir('data/files') == [
        s3_file3_fn,
    ]
    with fs3.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Since 'file.txt' was removed in third SIP, we should only fetch the
    # 'test2.txt', also from the first archive, since that's where this
    # file resides physically.
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s3_file2_fp,
                                               base=base_uri,
                                               s1ts=sip1_ts),
    ])
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig
def test_write_patched(mocker, sips, archive_fs,
                       secure_sipfile_name_formatter):
    """Test the BagIt archiving with previous SIP as a base."""
    # Mock the bagging date generation so the 'Bagging-Date' tag is predefined
    dt = datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S.%f")
    mocker.patch('invenio_sipstore.archivers.bagit_archiver.BagItArchiver.'
                 '_generate_bagging_date', return_value=dt)

    arch1 = BagItArchiver(sips[0])
    arch1.write_all_files()
    arch2 = BagItArchiver(sips[1], patch_of=sips[0])
    arch2.write_all_files()
    arch3 = BagItArchiver(sips[2], patch_of=sips[1],
                          include_all_previous=True)
    arch3.write_all_files()
    arch5 = BagItArchiver(sips[4], patch_of=sips[2],
                          include_all_previous=True)
    arch5.write_all_files()
    # NOTE: We take only SIP-1, SIP-2, SIP-3 and SIP-5.
    # Enumeration of related objects follows the "sips" fixture naming
    fs1 = archive_fs.opendir(arch1.get_archive_subpath())
    fs2 = archive_fs.opendir(arch2.get_archive_subpath())
    fs3 = archive_fs.opendir(arch3.get_archive_subpath())
    fs5 = archive_fs.opendir(arch5.get_archive_subpath())
    assert len(fs1.listdir()) == 5
    assert len(fs2.listdir()) == 6  # Includes 'fetch.txt'
    assert len(fs3.listdir()) == 6  # Includes 'fetch.txt'
    assert len(fs5.listdir()) == 6  # Includes 'fetch.txt'

    # Check SIP-1,2,3,5 data contents
    assert set(fs1.listdir('data')) == \
        set(['files', 'metadata', 'filenames.txt'])
    assert len(fs1.listdir('data/files')) == 1
    assert len(fs1.listdir('data/metadata')) == 2

    assert set(fs2.listdir('data')) == \
        set(['files', 'metadata', 'filenames.txt'])
    assert len(fs2.listdir('data/files')) == 1
    assert len(fs2.listdir('data/metadata')) == 2

    assert set(fs3.listdir('data')) == \
        set(['files', 'metadata', 'filenames.txt'])
    assert len(fs3.listdir('data/files')) == 1
    assert len(fs3.listdir('data/metadata')) == 2

    assert set(fs5.listdir('data')) == \
        set(['metadata', 'filenames.txt'])
    assert len(fs5.listdir('data/metadata')) == 1

    # Fetch the filenames for easier fixture formatting below
    file1_fn = '{0}-foobar.txt'.format(
        fetch_file_endswith(sips[0], 'foobar.txt').file_id)
    file2_fn = '{0}-foobar2.txt'.format(
        fetch_file_endswith(sips[1], 'foobar2.txt').file_id)
    file3_fn = '{0}-foobar3.txt'.format(
        fetch_file_endswith(sips[2], 'foobar3.txt').file_id)
    file2_rn_fn = '{0}-foobar2-renamed.txt'.format(
        fetch_file_endswith(sips[2], 'foobar2-renamed.txt').file_id)

    assert file2_fn[:36] == file2_rn_fn[:36]
    # Both file2_fn and file2_rn_fn are referring to the same FileInstance,
    # so their UUID prefix should match
    expected_sip1 = [
        ('data/files/{0}'.format(file1_fn), 'test'),
        ('data/metadata/marcxml-test.xml', '<p>XML 1</p>'),
        ('data/metadata/json-test.json', '{"title": "JSON 1"}'),
        ('bagit.txt',
            'BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8'),
        ('manifest-md5.txt', set([
            "{checksum} {filepath}".format(
                **_read_file(fs1, 'data/files/{0}'.format(file1_fn))),
            "{checksum} {filepath}".format(
                **_read_file(fs1, 'data/metadata/marcxml-test.xml')),
            "{checksum} {filepath}".format(
                **_read_file(fs1, 'data/metadata/json-test.json')),
            "{checksum} {filepath}".format(
                **_read_file(fs1, 'data/filenames.txt')),
        ])),
        ('data/filenames.txt', set([
            '{0} foobar.txt'.format(file1_fn),
        ])),
        ('bag-info.txt', (
            "Source-Organization: European Organization for Nuclear Research\n"
            "Organization-Address: CERN, CH-1211 Geneva 23, Switzerland\n"
            "Bagging-Date: {0}\n".format(dt) +
            "Payload-Oxum: 93.4\n"
            "External-Identifier: {0}/SIPBagIt-v1.0.0\n".format(sips[0].id) +
            "External-Description: BagIt archive of SIP."
        )),
    ]
    expected_sip2 = [
        ('data/files/{0}'.format(file2_fn), 'test-second'),
        ('data/metadata/marcxml-test.xml', '<p>XML 2</p>'),
        ('data/metadata/json-test.json', '{"title": "JSON 2"}'),
        ('bagit.txt',
            'BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8'),
        ('fetch.txt', set(["{0} {1} {2}".format(
            fs1.getsyspath('data/files/{0}'.format(file1_fn)),
            4, 'data/files/{0}'.format(file1_fn)),
        ])),
        ('manifest-md5.txt', set([
            "{checksum} {filepath}".format(
                **_read_file(fs1, 'data/files/{0}'.format(file1_fn))),
            "{checksum} {filepath}".format(
                **_read_file(fs2, 'data/files/{0}'.format(file2_fn))),
            "{checksum} {filepath}".format(
                **_read_file(fs2, 'data/metadata/marcxml-test.xml')),
            "{checksum} {filepath}".format(
                **_read_file(fs2, 'data/metadata/json-test.json')),
            "{checksum} {filepath}".format(
                **_read_file(fs2, 'data/filenames.txt')),
        ])),
        ('data/filenames.txt', set([
            '{0} foobar.txt'.format(file1_fn),
            '{0} foobar2.txt'.format(file2_fn),
        ])),
        ('bag-info.txt', (
            "Source-Organization: European Organization for Nuclear Research\n"
            "Organization-Address: CERN, CH-1211 Geneva 23, Switzerland\n"
            "Bagging-Date: {0}\n".format(dt) +
            "Payload-Oxum: 165.5\n"
            "External-Identifier: {0}/SIPBagIt-v1.0.0\n".format(sips[1].id) +
            "External-Description: BagIt archive of SIP."
        )),
    ]
    expected_sip3 = [
        ('data/files/{0}'.format(file3_fn), 'test-third'),
        ('data/metadata/marcxml-test.xml', '<p>XML 3</p>'),
        ('data/metadata/json-test.json', '{"title": "JSON 3"}'),
        ('bagit.txt',
            'BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8'),
        ('fetch.txt', set([
            "{0} {1} {2}".format(
                fs1.getsyspath('data/files/{0}'.format(file1_fn)),
                4, 'data/files/{0}'.format(file1_fn)),
            # Explanation on entry below: The file is fetched using original
            # filename (file2_fn) as it will be archived in SIP-2, however
            # the new destination has the 'renamed' filename (file2_rn_fn).
            # This is correct and expected behaviour
            "{0} {1} {2}".format(
                fs2.getsyspath('data/files/{0}'.format(file2_fn)),
                11, 'data/files/{0}'.format(file2_rn_fn)),
        ])),
        ('manifest-md5.txt', set([
            "{checksum} {filepath}".format(
                **_read_file(fs1, 'data/files/{0}'.format(file1_fn))),
            # Manifest also specifies the renamed filename for File-2
            "{checksum} data/files/{newfilename}".format(
                newfilename=file2_rn_fn,
                **_read_file(fs2, 'data/files/{0}'.format(file2_fn))),
            "{checksum} {filepath}".format(
                **_read_file(fs3, 'data/files/{0}'.format(file3_fn))),
            "{checksum} {filepath}".format(
                **_read_file(fs3, 'data/metadata/marcxml-test.xml')),
            "{checksum} {filepath}".format(
                **_read_file(fs3, 'data/metadata/json-test.json')),
            "{checksum} {filepath}".format(
                **_read_file(fs3, 'data/filenames.txt')),
        ])),
        ('data/filenames.txt', set([
            '{0} foobar.txt'.format(file1_fn),
            '{0} foobar2.txt'.format(file2_fn),
            '{0} foobar3.txt'.format(file3_fn),
        ])),
        ('bag-info.txt', (
            "Source-Organization: European Organization for Nuclear Research\n"
            "Organization-Address: CERN, CH-1211 Geneva 23, Switzerland\n"
            "Bagging-Date: {0}\n".format(dt) +
            "Payload-Oxum: 236.6\n"
            "External-Identifier: {0}/SIPBagIt-v1.0.0\n".format(sips[2].id) +
            "External-Description: BagIt archive of SIP."
        )),
    ]

    expected_sip5 = [
        ('data/metadata/marcxml-test.xml', '<p>XML 5 Meta Only</p>'),
        ('bagit.txt',
            'BagIt-Version: 0.97\nTag-File-Character-Encoding: UTF-8'),
        ('fetch.txt', set([
            "{0} {1} {2}".format(
                fs1.getsyspath('data/files/{0}'.format(file1_fn)),
                4, 'data/files/{0}'.format(file1_fn)),
            # As in "expected_sip3" above, the file is fetched using original
            # filename (file2_fn) as it will be archived in SIP-2, however
            # the new destination has the 'renamed' filename (file2_rn_fn).
            # This is correct and expected behaviour
            "{0} {1} {2}".format(
                fs2.getsyspath('data/files/{0}'.format(file2_fn)),
                11, 'data/files/{0}'.format(file2_rn_fn)),
            "{0} {1} {2}".format(
                fs3.getsyspath('data/files/{0}'.format(file3_fn)),
                10, 'data/files/{0}'.format(file3_fn)),
        ])),
        ('manifest-md5.txt', set([
            "{checksum} {filepath}".format(
                **_read_file(fs1, 'data/files/{0}'.format(file1_fn))),
            # Manifest also specifies the renamed filename for File-2
            "{checksum} data/files/{newfilename}".format(
                newfilename=file2_rn_fn,
                **_read_file(fs2, 'data/files/{0}'.format(file2_fn))),
            "{checksum} {filepath}".format(
                **_read_file(fs3, 'data/files/{0}'.format(file3_fn))),
            "{checksum} {filepath}".format(
                **_read_file(fs5, 'data/metadata/marcxml-test.xml')),
            "{checksum} {filepath}".format(
                **_read_file(fs5, 'data/filenames.txt')),
        ])),
        ('data/filenames.txt', set([
            '{0} foobar.txt'.format(file1_fn),
            '{0} foobar2.txt'.format(file2_fn),
            '{0} foobar3.txt'.format(file3_fn),
        ])),
        ('bag-info.txt', (
            "Source-Organization: European Organization for Nuclear Research\n"
            "Organization-Address: CERN, CH-1211 Geneva 23, Switzerland\n"
            "Bagging-Date: {0}\n".format(dt) +
            "Payload-Oxum: 227.5\n"
            "External-Identifier: {0}/SIPBagIt-v1.0.0\n".format(sips[4].id) +
            "External-Description: BagIt archive of SIP."
        )),
    ]

    for fs, expected in [(fs1, expected_sip1),
                         (fs2, expected_sip2),
                         (fs3, expected_sip3),
                         (fs5, expected_sip5)]:
        for fn, exp_content in expected:
            with fs.open(fn) as fp:
                if isinstance(exp_content, set):
                    content = set(fp.read().splitlines())
                else:
                    content = fp.read()
            assert content == exp_content
Beispiel #4
0
def test_archiving(app, db, deposit, deposit_file, locations, archive_fs):
    """Test ZenodoSIP archiving."""
    # Stash the configuration and enable writing
    orig = app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED']
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = True
    deposit.files['test2.txt'] = BytesIO(b'test-two')
    deposit_v1 = publish_and_expunge(db, deposit)
    recid_v1, record_v1 = deposit_v1.fetch_published()
    recid_v1_id = recid_v1.id
    # Record files after publishing: 'test.txt', 'test2.txt'

    sip1 = SIP(SIPModel.query.one())
    sip1_id = sip1.id

    # Edit the metadata
    deposit_v1 = deposit_v1.edit()
    deposit_v1['title'] = "New title"
    deposit_v1 = publish_and_expunge(db, deposit_v1)
    # Record files after publishing: 'test.txt', 'test2.txt'
    sip2_id = SIPModel.query.order_by(SIPModel.created.desc()).first().id

    # Create a new version
    deposit_v1.newversion()
    recid_v1 = PersistentIdentifier.query.get(recid_v1_id)
    pv = PIDVersioning(child=recid_v1)
    depid_v2 = pv.draft_child_deposit
    deposit_v2 = ZenodoDeposit.get_record(depid_v2.object_uuid)
    del deposit_v2.files['test.txt']
    deposit_v2.files['test3.txt'] = BytesIO(b('test-three'))
    deposit_v2 = publish_and_expunge(db, deposit_v2)
    # Record files after publishing: 'test2.txt', 'test3.txt'

    sip1 = SIP(SIPModel.query.get(sip1_id))
    sip2 = SIP(SIPModel.query.get(sip2_id))
    sip3 = SIP(SIPModel.query.order_by(SIPModel.created.desc()).first())

    # Becase we are using secure_filename when writing SIPFiles we need to
    # genenarate the correct names: <SIPFile.id>-<secure_filename>
    s1_file1_fn = '{0}-test.txt'.format(fetch_suff(sip1, 'test.txt').file_id)
    s1_file1_fp = 'data/files/{0}'.format(s1_file1_fn)

    s1_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip1, 'test2.txt').file_id)
    s1_file2_fp = 'data/files/{0}'.format(s1_file2_fn)

    s3_file2_fn = '{0}-test2.txt'.format(fetch_suff(sip3, 'test2.txt').file_id)
    s3_file2_fp = 'data/files/{0}'.format(s3_file2_fn)

    s3_file3_fn = '{0}-test3.txt'.format(fetch_suff(sip3, 'test3.txt').file_id)
    s3_file3_fp = 'data/files/{0}'.format(s3_file3_fn)

    sip1_bagmeta = json.loads(next(
        m.content for m in sip1.metadata if m.type.name == 'bagit'))['files']
    sip2_bagmeta = json.loads(next(
        m.content for m in sip2.metadata if m.type.name == 'bagit'))['files']
    sip3_bagmeta = json.loads(next(
        m.content for m in sip3.metadata if m.type.name == 'bagit'))['files']

    # Check if Bagit metadata contains the correct file-fetching information
    assert set([f['filepath'] for f in sip1_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt'])
    assert not BagItArchiver._is_fetched(
        get_m_item(sip1_bagmeta, s1_file1_fp))
    assert not BagItArchiver._is_fetched(
        get_m_item(sip1_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip2_bagmeta]) == \
        set([s1_file1_fp,
             s1_file2_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])
    # Both files should be fetched since it's only metadata-edit submission
    assert BagItArchiver._is_fetched(
        get_m_item(sip2_bagmeta, s1_file1_fp))
    assert BagItArchiver._is_fetched(
        get_m_item(sip2_bagmeta, s1_file2_fp))

    assert set([f['filepath'] for f in sip3_bagmeta]) == \
        set([s3_file2_fp,
             s3_file3_fp,
             'data/filenames.txt',
             'data/metadata/record-json.json', 'bag-info.txt',
             'manifest-md5.txt', 'bagit.txt', 'tagmanifest-md5.txt',
             'fetch.txt'])

    # First file should be fetched from previous version and new file should
    # be archived in this bag.
    assert BagItArchiver._is_fetched(
        get_m_item(sip3_bagmeta, s3_file2_fp))
    assert not BagItArchiver._is_fetched(
        get_m_item(sip3_bagmeta, s3_file3_fp))
    archiver1 = BagItArchiver(sip1)
    archiver2 = BagItArchiver(sip2)
    archiver3 = BagItArchiver(sip3)

    # Each archiver subpath follows: '<recid>/r/<ISO-8601-SIP-timestamp>'
    sip1_ts = arrow.get(sip1.model.created).isoformat()
    sip2_ts = arrow.get(sip2.model.created).isoformat()
    sip3_ts = arrow.get(sip3.model.created).isoformat()
    assert archiver1.get_archive_subpath() == '2/r/{0}'.format(sip1_ts)
    assert archiver2.get_archive_subpath() == '2/r/{0}'.format(sip2_ts)
    assert archiver3.get_archive_subpath() == '3/r/{0}'.format(sip3_ts)

    # As a test, write the SIPs in reverse chronological order
    assert not sip1.archived
    assert not sip2.archived
    assert not sip3.archived
    archive_sip.delay(sip3.id)
    archive_sip.delay(sip2.id)
    archive_sip.delay(sip1.id)
    assert sip1.archived
    assert sip2.archived
    assert sip3.archived

    fs1 = archive_fs.opendir(archiver1.get_archive_subpath())
    assert set(fs1.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt',
                                      'manifest-md5.txt', 'bag-info.txt',
                                      'data'])
    assert set(fs1.listdir('data')) == set(['metadata', 'files',
                                            'filenames.txt'])
    assert fs1.listdir('data/metadata') == ['record-json.json', ]
    assert set(fs1.listdir('data/files')) == set([s1_file1_fn, s1_file2_fn])

    fs2 = archive_fs.opendir(archiver2.get_archive_subpath())
    assert set(fs2.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt',
                                      'manifest-md5.txt', 'bag-info.txt',
                                      'data', 'fetch.txt'])
    # Second SIP has written only the metadata,
    # because of that There should be no 'files/', but 'filesnames.txt' should
    # still be there becasue of the fetch.txt
    assert set(fs2.listdir('data')) == set(['metadata', 'filenames.txt'])
    assert fs2.listdir('data/metadata') == ['record-json.json', ]

    with fs2.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Fetched files should correctly fetch the files from the first archive
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 4 {fn}'.format(fn=s1_file1_fp, base=base_uri,
                                               s1ts=sip1_ts),
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s1_file2_fp, base=base_uri,
                                               s1ts=sip1_ts),
    ])

    fs3 = archive_fs.opendir(archiver3.get_archive_subpath())
    assert set(fs3.listdir()) == set(['tagmanifest-md5.txt', 'bagit.txt',
                                      'manifest-md5.txt', 'bag-info.txt',
                                      'data', 'fetch.txt'])
    # Third SIP should write only the extra 'test3.txt' file
    assert set(fs3.listdir('data')) == set(['metadata', 'files',
                                            'filenames.txt'])
    assert fs3.listdir('data/metadata') == ['record-json.json', ]
    assert fs3.listdir('data/files') == [s3_file3_fn, ]
    with fs3.open('fetch.txt') as fp:
        cnt = fp.read().splitlines()
    # Since 'file.txt' was removed in third SIP, we should only fetch the
    # 'test2.txt', also from the first archive, since that's where this
    # file resides physically.
    base_uri = archiver1.get_archive_base_uri()
    assert set(cnt) == set([
        '{base}/2/r/{s1ts}/{fn} 8 {fn}'.format(fn=s3_file2_fp, base=base_uri,
                                               s1ts=sip1_ts),
    ])
    app.config['SIPSTORE_ARCHIVER_WRITING_ENABLED'] = orig