Python AnnexRepo.get_hexshaの例

プログラミング言語: Python

名前空間/パッケージ名: datalad.support.annexrepo

クラス/型: AnnexRepo

メソッド/関数: get_hexsha

hotexamples.comのコード掲載数: 7

Python AnnexRepo.get_hexsha - 7件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのdatalad.support.annexrepo.AnnexRepo.get_hexshaの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

AnnexRepo(30)

add(23)

commit(23)

is_valid_repo(14)

get_branch_commits(10)

drop(9)

is_direct_mode(7)

get_hexsha(6)

checkout(5)

_run_annex_command(4)

is_managed_branch(4)

is_crippled_fs(4)

get_file_key(4)

get_branches(4)

set_gitattributes(3)

get_branch_commits_(3)

whereis(3)

add_urls(3)

call_annex(3)

_init(3)

annex_proxy(2)

get_gitattributes(2)

_annex_custom_command(2)

get_remotes(2)

git_get_branches(2)

get_special_remotes(2)

annex_get(2)

get_submodules(2)

set_direct_mode(2)

fsck(2)

file_has_content(2)

get_tags(2)

git_add(2)

git_checkout(2)

add_to_annex(2)

get(2)

_set_direct_mode(1)

is_dirty(1)

is_initialized(1)

add_submodule(1)

precommit(1)

is_special_annex_remote(1)

is_under_annex(1)

proxy(1)

remove_remote(1)

set_default_backend(1)

_check_git_annex_version(1)

set_metadata(1)

tag(1)

unlock(1)

コード例 #1

ファイルを表示

ファイル: test_balsa.py プロジェクト: kyleam/datalad-crawler

def test_balsa_pipeline1(ind, topurl, outd, clonedir):
    list(initiate_dataset(
        template="balsa",
        dataset_name='dataladtest-WG33',
        path=outd,
        data_fields=['dataset_id'])({'dataset_id': 'WG33'}))

    with chpwd(outd):
        pipeline = ofpipeline('WG33', url=topurl)
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    repo = AnnexRepo(outd, create=False)  # to be used in the checks
    # Inspect the tree -- that we have all the branches
    branches = {'master', 'incoming', 'incoming-processed', 'git-annex'}
    eq_(set(repo.get_branches()), branches)
    # since now we base incoming on master -- and there were nothing custom
    # in master after incoming-processed, both branches should be the same
    eq_(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed'))
    # but that one is different from incoming
    assert_not_equal(repo.get_hexsha('incoming'), repo.get_hexsha('incoming-processed'))

    get_branch_commits = repo.get_branch_commits_ \
        if hasattr(repo, 'get_branch_commits_') else repo.get_branch_commits
    commits = {b: list(get_branch_commits(b)) for b in branches}
    # all commits out there -- init ds + init crawler + 1*(incoming, processed)
    # The number of commits in master differs based on the create variant used
    # (the one DataLad's master makes only one commit).
    ncommits_master = len(commits["master"])
    assert_in(ncommits_master, [4, 5])
    # incoming branches from master but lacks one merge commit.
    eq_(len(commits['incoming']), ncommits_master - 1)
    # incoming-processed is on master.
    eq_(len(commits['incoming-processed']), ncommits_master)

    with chpwd(outd):
        eq_(set(glob('*')), {'dir1', 'file1.nii'})
        all_files = sorted(find_files('.'))

    fpath = opj(outd, 'file1.nii')
    ok_file_has_content(fpath, "content of file1.nii")
    ok_file_under_git(fpath, annexed=True)
    fpath2 = opj(outd, 'dir1', 'file2.nii')
    ok_file_has_content(fpath2, "content of file2.nii")
    ok_file_under_git(fpath2, annexed=True)

    target_files = {
        './.datalad/crawl/crawl.cfg',
        './.datalad/crawl/statuses/incoming.json',
        './.datalad/meta/balsa.json',
        './.datalad/config',
        './file1.nii', './dir1/file2.nii',
    }

    eq_(set(all_files), target_files)

コード例 #2

ファイルを表示

def test_balsa_pipeline1(ind, topurl, outd, clonedir):
    list(
        initiate_dataset(template="balsa",
                         dataset_name='dataladtest-WG33',
                         path=outd,
                         data_fields=['dataset_id'])({
                             'dataset_id': 'WG33'
                         }))

    with chpwd(outd):
        pipeline = ofpipeline('WG33', url=topurl)
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    repo = AnnexRepo(outd, create=False)  # to be used in the checks
    # Inspect the tree -- that we have all the branches
    branches = {'master', 'incoming', 'incoming-processed', 'git-annex'}
    eq_(set(repo.get_branches()), branches)
    assert_not_equal(repo.get_hexsha('master'),
                     repo.get_hexsha('incoming-processed'))
    # and that one is different from incoming
    assert_not_equal(repo.get_hexsha('incoming'),
                     repo.get_hexsha('incoming-processed'))

    commits = {b: list(repo.get_branch_commits(b)) for b in branches}
    eq_(len(commits['incoming']), 1)
    eq_(len(commits['incoming-processed']), 2)
    eq_(
        len(commits['master']), 6
    )  # all commits out there -- init ds + init crawler + 1*(incoming, processed, merge)

    with chpwd(outd):
        eq_(set(glob('*')), {'dir1', 'file1.nii'})
        all_files = sorted(find_files('.'))

    fpath = opj(outd, 'file1.nii')
    ok_file_has_content(fpath, "content of file1.nii")
    ok_file_under_git(fpath, annexed=True)
    fpath2 = opj(outd, 'dir1', 'file2.nii')
    ok_file_has_content(fpath2, "content of file2.nii")
    ok_file_under_git(fpath2, annexed=True)

    target_files = {
        './.datalad/crawl/crawl.cfg',
        './.datalad/crawl/statuses/incoming.json',
        './.datalad/meta/balsa.json',
        './.datalad/config',
        './file1.nii',
        './dir1/file2.nii',
    }

    eq_(set(all_files), target_files)

コード例 #3

ファイルを表示

ファイル: test_publish.py プロジェクト: emmetaobrien/dats-validator

def test_gh1426(origin_path, target_path):
    # set up a pair of repos, one the published copy of the other
    origin = create(origin_path)
    target = AnnexRepo(target_path, create=True)
    target.config.set(
        'receive.denyCurrentBranch', 'updateInstead', where='local')
    origin.siblings('add', name='target', url=target_path)
    origin.publish(to='target')
    ok_clean_git(origin.path)
    ok_clean_git(target.path)
    eq_(origin.repo.get_hexsha(), target.get_hexsha())

    # gist of #1426 is that a newly added subdataset does not cause the
    # superdataset to get published
    origin.create('sub')
    ok_clean_git(origin.path)
    assert_not_equal(origin.repo.get_hexsha(), target.get_hexsha())
    # now push
    res = origin.publish(to='target')
    assert_result_count(res, 1)
    assert_result_count(res, 1, status='ok', type='dataset', path=origin.path)
    eq_(origin.repo.get_hexsha(), target.get_hexsha())

コード例 #4

ファイルを表示

def test_openfmri_pipeline2(ind, topurl, outd):
    # no versioned files -- should still work! ;)

    list(
        initiate_dataset(template="openfmri",
                         dataset_name='dataladtest-ds666',
                         path=outd,
                         data_fields=['dataset'])({
                             'dataset': 'ds666'
                         }))

    with chpwd(outd):
        pipeline = ofpipeline('ds666', versioned_urls=False, topurl=topurl)
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    repo = AnnexRepo(outd, create=False)  # to be used in the checks
    # Inspect the tree -- that we have all the branches
    branches = {'master', 'incoming', 'incoming-processed', 'git-annex'}
    eq_(set(repo.get_branches()), branches)
    # We do not have custom changes in master yet, so it just follows incoming-processed atm
    # eq_(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed'))
    # Since we did initiate_dataset -- now we have separate master!
    assert_not_equal(repo.get_hexsha('master'),
                     repo.get_hexsha('incoming-processed'))
    # and that one is different from incoming
    assert_not_equal(repo.get_hexsha('incoming'),
                     repo.get_hexsha('incoming-processed'))

    # actually the tree should look quite neat with 1.0.0 tag having 1 parent in incoming
    # 1.0.1 having 1.0.0 and the 2nd commit in incoming as parents

    commits_hexsha = {b: list(_get_branch_commits(repo, b)) for b in branches}
    commits_l = {
        b: list(_get_branch_commits(repo, b, limit='left-only'))
        for b in branches
    }

    # all commits out there:
    # backend set, dataset init, crawler, init, incoming (shares with master -1),
    #   (2 or 3 commits, depending on create variant)
    # incoming-processed, merge, aggregate metadata:
    ncommits_master = len(commits_hexsha['master'])
    assert_in(ncommits_master, [5, 6])
    assert_in(len(commits_l['master']), [4, 5])

    eq_(len(commits_hexsha['incoming']), ncommits_master - 2)
    eq_(len(commits_l['incoming']), ncommits_master - 2)
    eq_(len(commits_hexsha['incoming-processed']), ncommits_master - 1)
    # TODO inspect by knowledgeable person and re-enable
    #eq_(len(commits_l['incoming-processed']), ncommits_master - 2)

    # rerun pipeline -- make sure we are on the same in all branches!
    with chpwd(outd):
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    commits_hexsha_ = {b: list(_get_branch_commits(repo, b)) for b in branches}
    eq_(commits_hexsha, commits_hexsha_)  # i.e. nothing new
    eq_(out[0]['datalad_stats'], ActivityStats(files=2, skipped=2, urls=2))
    eq_(out[0]['datalad_stats'], out[0]['datalad_stats'].get_total())

    os.rename(opj(ind, 'ds666', 'ds666_R2.0.0.tar.gz'),
              opj(ind, 'ds666', 'ds666.tar.gz'))

    with chpwd(outd):
        out = run_pipeline(pipeline)
    eq_(len(out), 1)
    eq_(out[0]['datalad_stats'], ActivityStats())  # was committed
    stats_total = out[0]['datalad_stats'].get_total()
    stats_total.downloaded_size = 0
    eq_(
        stats_total,
        ActivityStats(files=4,
                      overwritten=1,
                      skipped=1,
                      downloaded=1,
                      merges=[['incoming', 'incoming-processed']],
                      versions=['1.0.0'],
                      renamed=1,
                      urls=2,
                      add_annex=2))
    # in reality there is also 1.0.0+1 tag since file changed but no version suffix
    eq_(repo.get_tags(output='name'), ['1.0.0', '1.0.0+1'])

    check_dropall_get(repo)

コード例 #5

ファイルを表示

def test_openfmri_pipeline1(ind, topurl, outd, clonedir):
    index_html = opj(ind, 'ds666', 'index.html')

    list(
        initiate_dataset(template="openfmri",
                         dataset_name='dataladtest-ds666',
                         path=outd,
                         data_fields=['dataset'])({
                             'dataset': 'ds666'
                         }))

    repo = AnnexRepo(outd, create=False)  # to be used in the checks
    # Since datalad 0.11.2 all .metadata/objects go under annex.
    # Here we have a test where we force drop all annexed content,
    # to mitigate that let's place all metadata under git
    dotdatalad_attributes_file = opj('.datalad', '.gitattributes')
    repo.set_gitattributes([('metadata/objects/**', {
        'annex.largefiles': '(nothing)'
    })], dotdatalad_attributes_file)
    # --amend so we do not cause change in # of commits below
    repo.commit("gitattributes",
                files=dotdatalad_attributes_file,
                options=['--amend'])

    with chpwd(outd):
        pipeline = ofpipeline('ds666', versioned_urls=False, topurl=topurl)
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    # Inspect the tree -- that we have all the branches
    branches = {'master', 'incoming', 'incoming-processed', 'git-annex'}
    eq_(set(repo.get_branches()), branches)
    # We do not have custom changes in master yet, so it just follows incoming-processed atm
    # eq_(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed'))
    # Since we did initiate_dataset -- now we have separate master!
    assert_not_equal(repo.get_hexsha('master'),
                     repo.get_hexsha('incoming-processed'))
    # and that one is different from incoming
    assert_not_equal(repo.get_hexsha('incoming'),
                     repo.get_hexsha('incoming-processed'))

    t1w_fpath_nover = opj(outd, 'sub1', 'anat', 'sub-1_T1w.dat')
    ok_file_has_content(t1w_fpath_nover, "mighty load in old format")

    #
    # And now versioned files were specified!
    #
    add_to_index(index_html, content=_versioned_files)

    with chpwd(outd):
        pipeline = ofpipeline('ds666', versioned_urls=False, topurl=topurl)
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    ok_(
        not exists(t1w_fpath_nover),
        "%s file should no longer be there if unversioned files get removed correctly"
        % t1w_fpath_nover)
    repo = AnnexRepo(outd, create=False)  # to be used in the checks
    # Inspect the tree -- that we have all the branches
    branches = {'master', 'incoming', 'incoming-processed', 'git-annex'}
    eq_(set(repo.get_branches()), branches)
    # We do not have custom changes in master yet, so it just follows incoming-processed atm
    # eq_(repo.get_hexsha('master'), repo.get_hexsha('incoming-processed'))
    # Since we did initiate_dataset -- now we have separate master!
    assert_not_equal(repo.get_hexsha('master'),
                     repo.get_hexsha('incoming-processed'))
    # and that one is different from incoming
    assert_not_equal(repo.get_hexsha('incoming'),
                     repo.get_hexsha('incoming-processed'))

    # actually the tree should look quite neat with 1.0.0 tag having 1 parent in incoming
    # 1.0.1 having 1.0.0 and the 2nd commit in incoming as parents

    commits_hexsha = {b: list(_get_branch_commits(repo, b)) for b in branches}
    commits_l = {
        b: list(_get_branch_commits(repo, b, limit='left-only'))
        for b in branches
    }

    # all commits out there:
    # dataset init, crawler init
    #   (2 commits)
    # + 3*(incoming, processed, merge)
    # + 3*aggregate-metadata update
    #   - 1 since now that incoming starts with master, there is one less merge
    # In --incremental mode there is a side effect of absent now
    #   2*remove of obsolete metadata object files,
    #     see https://github.com/datalad/datalad/issues/2772
    # TODO inspect by knowledgeable person and re-enable
    #ncommits_master = len(commits_hexsha['master'])
    #assert_in(ncommits_master, [13, 14])
    #assert_in(len(commits_l['master']), [8, 9])

    # TODO inspect by knowledgeable person and re-enable
    #eq_(len(commits_hexsha['incoming']), ncommits_master - 8)
    #eq_(len(commits_l['incoming']), ncommits_master - 8)
    #eq_(len(commits_hexsha['incoming-processed']), ncommits_master - 5)
    #eq_(len(commits_l['incoming-processed']), ncommits_master - 8)

    # Check tags for the versions
    eq_(out[0]['datalad_stats'].get_total().versions, ['1.0.0', '1.0.1'])
    # +1 because original "release" was assumed to be 1.0.0
    repo_tags = repo.get_tags()
    eq_(repo.get_tags(output='name'), ['1.0.0', '1.0.0+1', '1.0.1'])

    # Ben: The tagged ones currently are the ones with the message
    # '[DATALAD] dataset aggregate metadata update\n':
    #eq_(repo_tags[0]['hexsha'], commits_l['master'][4])  # next to the last one
    #eq_(repo_tags[-1]['hexsha'], commits_l['master'][0])  # the last one

    def hexsha(l):
        return l.__class__(x.hexsha for x in l)

    # TODO requires additional tooling to re-enable
    ## Verify that we have desired tree of merges
    #eq_(hexsha(commits_l['incoming-processed'][0].parents), (commits_l['incoming-processed'][1],
    #                                                         commits_l['incoming'][0]))
    #eq_(hexsha(commits_l['incoming-processed'][2].parents), (commits_l['incoming-processed'][3],  # also in master
    #                                                         commits_l['incoming'][2],))

    # ben: The following two comparisons are targeting these commits:
    # commit "Merge branch 'incoming-processed'\n" in commits_l['master'],
    # parents are:
    # commit "[DATALAD] dataset aggregate metadata update\n" in commits_l['master'] and
    # commit "[DATALAD] Added files from extracted archives\n\nFiles processed: 6\n renamed: 2\n +annex: 3\nBranches merged: incoming->incoming-processed\n" in commits_l['incoming-processed']
    # TODO requires additional tooling to re-enable
    #eq_(hexsha(commits_l['master'][1].parents), (commits_l['master'][2],
    #                                             commits_l['incoming-processed'][0]))
    #eq_(hexsha(commits_l['master'][3].parents), (commits_l['master'][4],
    #                                             commits_l['incoming-processed'][1]))

    with chpwd(outd):
        eq_(set(glob('*')), {'changelog.txt', 'sub-1'})
        all_files = sorted(find_files('.'))

    t1w_fpath = opj(outd, 'sub-1', 'anat', 'sub-1_T1w.dat')
    ok_file_has_content(t1w_fpath, "mighty load 1.0.1")
    ok_file_under_git(opj(outd, 'changelog.txt'), annexed=False)
    ok_file_under_git(t1w_fpath, annexed=True)

    try:
        # this is the new way
        from datalad.metadata.metadata import get_ds_aggregate_db_locations
        ds = Dataset('.')
        dbloc, objbase = get_ds_aggregate_db_locations(ds)
        dbloc = op.relpath(dbloc, start=ds.path)
    except ImportError:
        # this stopped working in early 2019 versions of datalad
        from datalad.metadata.metadata import agginfo_relpath
        dbloc = agginfo_relpath

    target_files = {
        './.datalad/config',
        './.datalad/crawl/crawl.cfg',
        # no more!
        # './.datalad/config.ttl', './.datalad/datalad.ttl',
        './.datalad/crawl/statuses/incoming.json',
        './.datalad/crawl/versions/incoming.json',
        './changelog.txt',
        './sub-1/anat/sub-1_T1w.dat',
        './sub-1/beh/responses.tsv',
        './' + dbloc,
    }
    target_incoming_files = {
        '.gitattributes',  # we marked default backend right in the incoming
        # we now base 'incoming' on master branch, so we get all those as well
        '.datalad/.gitattributes',
        '.datalad/config',
        '.datalad/crawl/crawl.cfg',
        'changelog.txt',
        'ds666.tar.gz',
        'ds666-beh_R1.0.1.tar.gz',
        'ds666_R1.0.0.tar.gz',
        'ds666_R1.0.1.tar.gz',
        'ds666_R2.0.0.tar.gz',
        '.datalad/crawl/statuses/incoming.json',
        '.datalad/crawl/versions/incoming.json'
    }
    # Ben: metadata object files may differ in their names containing some checksum-ish shit ...
    # TODO: Check how those names are constructed and may be at least count the number of created object files in addition to that comparison
    eq_(
        set([
            f for f in all_files
            if not f.startswith('./.datalad/metadata/objects/')
        ]), target_files)

    # check that -beh was committed in 2nd commit in incoming, not the first one
    assert_not_in('ds666-beh_R1.0.1.tar.gz',
                  repo.get_files(commits_l['incoming'][-1]))
    assert_in('ds666-beh_R1.0.1.tar.gz',
              repo.get_files(commits_l['incoming'][0]))

    # rerun pipeline -- make sure we are on the same in all branches!
    with chpwd(outd):
        out = run_pipeline(pipeline)
    eq_(len(out), 1)

    commits_hexsha_ = {b: list(_get_branch_commits(repo, b)) for b in branches}
    eq_(commits_hexsha, commits_hexsha_)  # i.e. nothing new
    # actually we do manage to add_git 1 (README) since it is generated committed directly to git
    # BUT now fixed -- if not committed (was the same), should be marked as skipped
    # Nothing was committed so stats leaked all the way up
    eq_(out[0]['datalad_stats'], ActivityStats(files=5, skipped=5, urls=5))
    eq_(out[0]['datalad_stats'], out[0]['datalad_stats'].get_total())

    # rerun pipeline when new content is available
    # add new revision, rerun pipeline and check that stuff was processed/added correctly
    add_to_index(
        index_html,
        content=
        '<a href="ds666_R2.0.0.tar.gz">Raw data on AWS version 2.0.0</a>')

    with chpwd(outd):
        out = run_pipeline(pipeline)
        all_files_updated = sorted(find_files('.'))
    eq_(len(out), 1)
    assert_not_equal(out[0]['datalad_stats'].get_total(), ActivityStats())
    # there is no overlays ATM, so behav would be gone since no 2.0.0 for it!
    target_files.remove('./sub-1/beh/responses.tsv')

    # Ben: metadata object files may differ in their names containing some checksum-ish shit ...
    # TODO: Check how those names are constructed and may be at least count the number of created object files in addition to that comparison
    eq_(
        set([
            f for f in all_files_updated
            if not f.startswith('./.datalad/metadata/objects/')
        ]), target_files)

    # new instance so it re-reads git stuff etc
    # repo = AnnexRepo(outd, create=False)  # to be used in the checks
    commits_hexsha_ = {b: list(_get_branch_commits(repo, b)) for b in branches}
    commits_l_ = {
        b: list(_get_branch_commits(repo, b, limit='left-only'))
        for b in branches
    }

    assert_not_equal(commits_hexsha, commits_hexsha_)
    eq_(out[0]['datalad_stats'],
        ActivityStats())  # commit happened so stats were consumed
    # numbers seems to be right
    total_stats = out[0]['datalad_stats'].get_total()
    # but for some reason downloaded_size fluctuates.... why? probably archiving...?
    total_stats.downloaded_size = 0
    eq_(
        total_stats,
        ActivityStats(
            files=8,
            skipped=5,
            downloaded=1,
            renamed=1,
            urls=6,
            add_annex=2,  # add_git=1, # README
            versions=['2.0.0'],
            merges=[['incoming', 'incoming-processed']]))

    check_dropall_get(repo)

    # Let's see if pipeline would remove files we stopped tracking
    remove_from_index(index_html, '<a href=.ds666_R1.0.0[^<]*</a>')
    with chpwd(outd):
        with swallow_logs(new_level=logging.WARNING) as cml:
            out = run_pipeline(pipeline)
            # since files get removed in incoming, but repreprocessed completely
            # incomming-processed and merged into master -- new commits will come
            # They shouldn't have any difference but still should be new commits
            assert_in("There is already a tag 2.0.0 in the repository",
                      cml.out)
    eq_(len(out), 1)
    incoming_files = repo.get_files('incoming')
    target_incoming_files.remove('ds666_R1.0.0.tar.gz')
    eq_(set(incoming_files), target_incoming_files)
    commits_hexsha_removed = {
        b: list(_get_branch_commits(repo, b))
        for b in branches
    }
    # our 'statuses' database should have recorded the change thus got a diff
    # which propagated through all branches
    for b in 'master', 'incoming-processed':
        # with non persistent DB we had no changes
        # eq_(repo.repo.branches[b].commit.diff(commits_hexsha_[b][0]), [])
        assert_in(repo.pathobj / '.datalad/crawl/statuses/incoming.json',
                  repo.diff(b, commits_hexsha_[b][0]))
    dincoming = repo.diff('incoming', commits_hexsha_['incoming'][0])
    eq_(len(dincoming),
        2)  # 2 diff objects -- 1 file removed, 1 statuses updated
    eq_(
        set(dincoming.keys()), {
            repo.pathobj / '.datalad/crawl/statuses/incoming.json',
            repo.pathobj / 'ds666_R1.0.0.tar.gz'
        })

    eq_(out[0]['datalad_stats'].get_total().removed, 1)
    assert_not_equal(commits_hexsha_, commits_hexsha_removed)

    # we will check if a clone would be crawling just as good
    from datalad.api import crawl

    # make a brand new clone
    GitRepo.clone(outd, clonedir)

    def _pipeline(*args, **kwargs):
        """Helper to mock openfmri.pipeline invocation so it looks at our 'server'"""
        kwargs = updated(kwargs, {'topurl': topurl, 'versioned_urls': False})
        return ofpipeline(*args, **kwargs)

    with chpwd(clonedir), patch.object(openfmri, 'pipeline', _pipeline):
        output, stats = crawl(
        )  # we should be able to recrawl without doing anything
        ok_(stats, ActivityStats(files=6, skipped=6, urls=5))

コード例 #6

ファイルを表示

def test_check_dates(path):
    refdate = 1218182889

    with set_date(refdate - 1):
        ar = AnnexRepo(path, create=True)

        def tag_object(tag):
            """Return object for tag.  Do not dereference it.
            """
            # We can't use ar.get_tags because that returns the commit's hexsha,
            # not the tag's, and ar.get_hexsha is limited to commit objects.
            return ar.call_git_oneline(
                ["rev-parse", "refs/tags/{}".format(tag)], read_only=True)

        ar.add("foo")
        ar.commit("add foo")
        foo_commit = ar.get_hexsha()
        ar.commit("add foo")
        ar.tag("foo-tag", "tag before refdate")
        foo_tag = tag_object("foo-tag")
        # Make a lightweight tag to make sure `tag_dates` doesn't choke on it.
        ar.tag("light")
    with set_date(refdate + 1):
        ar.add("bar")
        ar.commit("add bar")
        bar_commit = ar.get_hexsha()
        ar.tag("bar-tag", "tag after refdate")
        bar_tag = tag_object("bar-tag")
    with set_date(refdate + 2):
        # Drop an annexed file so that we have more blobs in the git-annex
        # branch than its current tree.
        ar.drop("bar", options=["--force"])

    results = {}
    for which in ["older", "newer"]:
        result = check_dates(ar, refdate, which=which)["objects"]
        ok_(result)
        if which == "newer":
            assert_in(bar_commit, result)
            assert_not_in(foo_commit, result)
            assert_in(bar_tag, result)
        elif which == "older":
            assert_in(foo_commit, result)
            assert_not_in(bar_commit, result)
            assert_in(foo_tag, result)
        results[which] = result

    ok_(any(
        x.get("filename") == "uuid.log" for x in results["older"].values()))

    newer_tree = check_dates(ar, refdate, annex="tree")["objects"]

    def is_annex_log_blob(entry):
        return (entry["type"] == "annex-blob"
                and entry["filename"].endswith(".log"))

    def num_logs(entries):
        return sum(map(is_annex_log_blob, entries.values()))

    # Because we dropped bar above, we should have one more blob in the
    # git-annex branch than in the current tree of the git-annex branch.
    eq_(num_logs(results["newer"]) - num_logs(newer_tree), 1)

    # Act like today is one day from the reference timestamp to check that we
    # get the same results with the one-day-back default.
    seconds_in_day = 60 * 60 * 24
    with patch('time.time', return_value=refdate + seconds_in_day):
        assert_equal(check_dates(ar, annex="tree")["objects"], newer_tree)

    # We can give a path (str) instead of a GitRepo object.
    assert_equal(
        check_dates(path, refdate, annex="tree")["objects"], newer_tree)

    with assert_raises(ValueError):
        check_dates(ar, refdate, which="unrecognized")

コード例 #7

ファイルを表示

ファイル: test_repodates.py プロジェクト: datalad/datalad

def test_check_dates(path):
    refdate = 1218182889

    with set_date(refdate - 1):
        ar = AnnexRepo(path, create=True)
        ar.add("foo")
        ar.commit("add foo")
        foo_commit = ar.get_hexsha()
        ar.commit("add foo")
        ar.tag("foo-tag", "tag before refdate")
        # We can't use ar.get_tags because that returns the commit's hexsha,
        # not the tag's, and ar.get_hexsha is limited to commit objects.
        foo_tag = ar.repo.git.rev_parse("foo-tag")
        # Make a lightweight tag to make sure `tag_dates` doesn't choke on it.
        ar.tag("light")
    with set_date(refdate + 1):
        ar.add("bar")
        ar.commit("add bar")
        bar_commit = ar.get_hexsha()
        ar.tag("bar-tag", "tag after refdate")
        bar_tag = ar.repo.git.rev_parse("bar-tag")
    with set_date(refdate + 2):
        # Drop an annexed file so that we have more blobs in the git-annex
        # branch than its current tree.
        ar.drop("bar", options=["--force"])

    results = {}
    for which in ["older", "newer"]:
        result = check_dates(ar, refdate, which=which)["objects"]
        ok_(result)
        if which == "newer":
            assert_in(bar_commit, result)
            assert_not_in(foo_commit, result)
            assert_in(bar_tag, result)
        elif which == "older":
            assert_in(foo_commit, result)
            assert_not_in(bar_commit, result)
            assert_in(foo_tag, result)
        results[which] = result

    ok_(any(x.get("filename") == "uuid.log"
            for x in results["older"].values()))

    newer_tree = check_dates(ar, refdate, annex="tree")["objects"]

    def is_annex_log_blob(entry):
        return (entry["type"] == "annex-blob"
                and entry["filename"].endswith(".log"))

    def num_logs(entries):
        return sum(map(is_annex_log_blob, entries.values()))

    # Because we dropped bar above, we should have one more blob in the
    # git-annex branch than in the current tree of the git-annex branch.
    eq_(num_logs(results["newer"]) - num_logs(newer_tree), 1)

    # Act like today is one day from the reference timestamp to check that we
    # get the same results with the one-day-back default.
    seconds_in_day = 60 * 60 * 24
    with patch('time.time', return_value=refdate + seconds_in_day):
        assert_equal(check_dates(ar, annex="tree")["objects"],
                     newer_tree)

    # We can give a path (str) instead of a GitRepo object.
    assert_equal(check_dates(path, refdate, annex="tree")["objects"],
                 newer_tree)

    with assert_raises(ValueError):
        check_dates(ar, refdate, which="unrecognized")