Beispiel #1
0
def test_push_fetch_records(server_instance, backend):

    runner = CliRunner()
    with runner.isolated_filesystem():
        repo = Repository(getcwd(), exists=False)
        repo.init('foo', 'bar')
        dummyData = np.arange(50)
        co1 = repo.checkout(write=True, branch='master')
        co1.arraysets.init_arrayset(
            name='dummy', prototype=dummyData, named_samples=True, backend_opts=backend)
        for idx in range(10):
            dummyData[:] = idx
            co1.arraysets['dummy'][str(idx)] = dummyData
        co1.metadata['hello'] = 'world'
        co1.metadata['somemetadatakey'] = 'somemetadatavalue'
        cmt1 = co1.commit('first commit adding dummy data and hello meta')
        co1.close()

        repo.create_branch('testbranch')
        co2 = repo.checkout(write=True, branch='testbranch')
        for idx in range(10, 20):
            dummyData[:] = idx
            co2.arraysets['dummy'][str(idx)] = dummyData
        co2.metadata['foo'] = 'bar'
        cmt2 = co2.commit('first commit on test branch adding non-conflict data and meta')
        co2.close()

        repo.remote.add('origin', server_instance)

        res = runner.invoke(cli.push, ['origin', 'master'], obj=repo)
        assert res.exit_code == 0
        res = runner.invoke(cli.push, ['origin', 'testbranch'], obj=repo)
        assert res.exit_code == 0
Beispiel #2
0
def test_push_fetch_records(server_instance, backend):

    runner = CliRunner()
    with runner.isolated_filesystem():
        repo = Repository(getcwd(), exists=False)
        try:
            repo.init('foo', 'bar')
            dummyData = np.arange(50)
            co1 = repo.checkout(write=True, branch='master')
            co1.add_ndarray_column(name='dummy',
                                   prototype=dummyData,
                                   backend=backend)
            for idx in range(10):
                dummyData[:] = idx
                co1.columns['dummy'][str(idx)] = dummyData
            cmt1 = co1.commit('first commit adding dummy data')
            co1.close()

            repo.create_branch('testbranch')
            co2 = repo.checkout(write=True, branch='testbranch')
            for idx in range(10, 20):
                dummyData[:] = idx
                co2.columns['dummy'][str(idx)] = dummyData
            cmt2 = co2.commit(
                'first commit on test branch adding non-conflict data')
            co2.close()

            repo.remote.add('origin', server_instance)

            res = runner.invoke(cli.push, ['origin', 'master'], obj=repo)
            assert res.exit_code == 0
            res = runner.invoke(cli.push, ['origin', 'testbranch'], obj=repo)
            assert res.exit_code == 0
        finally:
            repo._env._close_environments()
Beispiel #3
0
def test_branch_create_and_delete(written_two_cmt_server_repo):
    server, base_repo = written_two_cmt_server_repo

    co = base_repo.checkout(write=True)
    cmt = co.commit_hash
    co.close()

    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        new_repo = Repository(P, exists=False)
        try:
            res = runner.invoke(cli.clone, [
                '--name', 'Foo Tester', '--email', '*****@*****.**', f'{server}'
            ],
                                obj=new_repo)
            assert res.exit_code == 0

            res = runner.invoke(cli.branch_create, ['testbranch'],
                                obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Created BRANCH: testbranch HEAD: {cmt}\n"

            branches = new_repo.list_branches()
            assert branches == ['master', 'origin/master', 'testbranch']

            res = runner.invoke(cli.branch_remove, ['testbranch'],
                                obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Deleted BRANCH: testbranch HEAD: {cmt}\n"

            branches = new_repo.list_branches()
            assert branches == ['master', 'origin/master']

            new_repo.create_branch('secondtest')
            co = new_repo.checkout(write=True, branch='secondtest')
            co.add_str_column('test_meta')
            newDigest = co.commit('dummy commit')
            co.close()

            # re-open with staging set to master so we can try to delete secondtest
            co = new_repo.checkout(write=True, branch='master')
            co.close()

            res = runner.invoke(cli.branch_remove, ['secondtest'],
                                obj=new_repo)
            assert res.exit_code == 1

            res = runner.invoke(cli.branch_remove, ['secondtest', '-f'],
                                obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Deleted BRANCH: secondtest HEAD: {newDigest}\n"

            res = runner.invoke(cli.branch_list, obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == "['master', 'origin/master']\n"
        finally:
            new_repo._env._close_environments()
Beispiel #4
0
def test_checkout_writer_branch_works(dummy_repo: Repository):
    from hangar.records.heads import get_staging_branch_head
    dummy_repo.create_branch('dev')
    runner = CliRunner()
    res = runner.invoke(cli.checkout, ['dev'], obj=dummy_repo)
    assert res.exit_code == 0
    assert res.stdout == 'Writer checkout head set to branch: dev\n'
    recorded_branch = get_staging_branch_head(dummy_repo._env.branchenv)
    assert recorded_branch == 'dev'
    assert dummy_repo.writer_lock_held is False
Beispiel #5
0
def test_cannot_operate_without_repo_init(managed_tmpdir):
    repo = Repository(path=managed_tmpdir, exists=False)

    with pytest.raises(RuntimeError):
        repo.writer_lock_held()
    with pytest.raises(RuntimeError):
        repo.checkout()
    with pytest.raises(RuntimeError):
        repo.writer_lock_held()
    with pytest.raises(RuntimeError):
        repo.log()
    with pytest.raises(RuntimeError):
        repo.summary()
    with pytest.raises(RuntimeError):
        repo.merge('fail', 'master', 'nonexistant')
    with pytest.raises(RuntimeError):
        repo.create_branch('test')
    with pytest.raises(RuntimeError):
        repo.list_branches()
    with pytest.raises(RuntimeError):
        repo.force_release_writer_lock()

    with pytest.raises(RuntimeError):
        repo.remote.add('origin', 'foo')
    with pytest.raises(RuntimeError):
        repo.remote.remove('origin')
    with pytest.raises(RuntimeError):
        repo.remote.fetch('origin', 'master')
    with pytest.raises(RuntimeError):
        repo.remote.fetch_data('origin', branch='master')
    with pytest.raises(RuntimeError):
        repo.remote.list_all()
    with pytest.raises(RuntimeError):
        repo.remote.ping('origin')
    with pytest.raises(RuntimeError):
        repo.remote.push('origin', 'master')
    with pytest.raises(RuntimeError):
        repo.remove_branch('master')

    with pytest.raises(RuntimeError):
        repo.path
    with pytest.raises(RuntimeError):
        repo.version
    with pytest.raises(RuntimeError):
        repo.writer_lock_held
    with pytest.raises(RuntimeError):
        repo.size_human
    with pytest.raises(RuntimeError):
        repo.size_nbytes

    assert repo._env.repo_is_initialized is False
Beispiel #6
0
def branch_create(repo: Repository, name, startpoint):
    """Create a branch with NAME at STARTPOINT (short-digest or branch)

    If no STARTPOINT is provided, the new branch is positioned at the HEAD of
    the staging area branch, automatically.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit
    from hangar.records.heads import get_staging_branch_head

    branch_names = repo.list_branches()
    if name in branch_names:
        e = ValueError(f'branch name: {name} already exists')
        raise click.ClickException(e)

    try:
        if startpoint is None:
            branch = get_staging_branch_head(repo._env.branchenv)
            base_commit = get_branch_head_commit(repo._env.branchenv, branch)
        elif startpoint in branch_names:
            base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
        else:
            base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)

        res = repo.create_branch(name, base_commit=base_commit)
    except (KeyError, ValueError, RuntimeError) as e:
        raise click.ClickException(e)

    click.echo(f'Created BRANCH: {res.name} HEAD: {res.digest}')
Beispiel #7
0
def branch_create(ctx, name, startpoint):
    """Create a branch with NAME at STARTPOINT (short-digest or branch)

    If no STARTPOINT is provided, the new branch is positioned at the HEAD of
    the staging area branch, automatically.
    """
    from hangar.records.heads import get_branch_head_commit, get_staging_branch_head

    P = os.getcwd()
    repo = Repository(path=P)
    branch_names = repo.list_branches()
    if name in branch_names:
        raise ValueError(f'branch name: {name} already exists')

    if startpoint is None:
        branch = get_staging_branch_head(repo._env.branchenv)
        base_commit = get_branch_head_commit(repo._env.branchenv, branch)
    elif startpoint in branch_names:
        base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    else:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)

    click.echo(f'BRANCH: ' +
               repo.create_branch(name, base_commit=base_commit) +
               f' HEAD: {base_commit}')
Beispiel #8
0
def test_checkout_writer_branch_lock_held_errors(dummy_repo: Repository):
    from hangar.records.heads import get_staging_branch_head
    dummy_repo.create_branch('testbranch')
    co = dummy_repo.checkout(write=True, branch='master')
    try:
        runner = CliRunner()
        res = runner.invoke(cli.checkout, ['testbranch'], obj=dummy_repo)
        assert res.exit_code == 1
        msg = res.stdout
        assert msg.startswith('Error: Cannot acquire the writer lock.') is True
        recorded_branch = get_staging_branch_head(dummy_repo._env.branchenv)
        assert recorded_branch == 'master'
        assert dummy_repo.writer_lock_held is True
        assert co.branch_name == 'master'
    finally:
        co.close()
    assert dummy_repo.writer_lock_held is False
Beispiel #9
0
def branch(l, b):
    if l:
        P = os.getcwd()
        repo = Repository(path=P)
        click.echo(repo.list_branch_names())
    if b:
        P = os.getcwd()
        repo = Repository(path=P)
        succ = repo.create_branch(b)
        click.echo(f'create branch operation success: {succ}')
    def test_server_push_two_branch_then_clone_fetch_data_options(
            self, two_branch_multi_commit_repo_class, managed_tmpdir_class, array5by7_class,
            fetchBranch, fetchCommit, fetchAsetns, fetchNbytes, fetchAll_history, tmp_path_factory):
        from hangar import Repository
        from operator import eq

        branch, branchHist, devCmts, masterHist, server_instance = two_branch_multi_commit_repo_class

        # Clone test (master branch)
        _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True)
        new_tmpdir = str(_new_tmpdir)
        newRepo = Repository(path=new_tmpdir, exists=False)
        newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True)
        newRepo.remote.fetch('origin', branch=branch.name)
        newRepo.create_branch('testbranch', base_commit=branchHist['head'])
        assert newRepo.list_branches() == ['master', 'origin/master', f'origin/{branch.name}', branch.name]

        # ------------------ format arguments depending on options -----------------

        kwargs = {
            'column_names': fetchAsetns,
            'max_num_bytes': fetchNbytes,
            'retrieve_all_history': fetchAll_history,
        }
        if fetchBranch is not None:
            func = branchHist if fetchBranch == 'testbranch' else masterHist
            kwargs['branch'] = fetchBranch
            kwargs['commit'] = None
        else:
            func = branchHist if fetchBranch == 'br' else masterHist
            kwargs['branch'] = None
            kwargs['commit'] = func['head']

        if fetchAll_history is True:
            commits_to_check = func['order']
        else:
            commits_to_check = [func['head']]

        # ----------------------- retrieve data with desired options --------------

        # This case should fail
        if (fetchAll_history is True) and isinstance(fetchNbytes, int):
            try:
                with pytest.raises(ValueError):
                    fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs)
            finally:
                newRepo._env._close_environments()
            return True
        # get data
        fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs)
        assert commits_to_check == fetch_commits

        # ------------- check that you got everything you expected ----------------

        for fCmt in fetch_commits:
            co = newRepo.checkout(commit=fCmt)
            assert co.commit_hash == fCmt

            # when we are checking one aset only
            if isinstance(fetchAsetns, tuple):
                d = co.columns[fetchAsetns[0]]
                # ensure we didn't fetch the other data simultaneously

                ds1SampList, ds2SampList, ds3SampList, ds4SampList = devCmts[fCmt]
                if fetchAsetns[0] == 'writtenaset':
                    compare = ds1SampList
                    cmp_func = np.allclose
                elif fetchAsetns[0] == '_two':
                    compare = ds2SampList
                    cmp_func = np.allclose
                elif fetchAsetns[0] == 'str_col':
                    compare = ds3SampList
                    cmp_func = eq
                else:
                    compare = ds4SampList
                    cmp_func = eq

                totalSeen = 0
                for idx, samp in enumerate(compare):
                    if fetchNbytes is None:
                        assert cmp_func(samp, d[str(idx)])
                    else:
                        try:
                            arr = d[str(idx)]
                            assert cmp_func(samp, arr)
                            try:
                                totalSeen += arr.nbytes
                            except AttributeError:
                                totalSeen += len(arr)
                        except FileNotFoundError:
                            pass
                        assert totalSeen <= fetchNbytes

            # compare both asets at the same time
            else:
                d = co.columns['writtenaset']
                dd = co.columns['_two']
                str_col = co.columns['str_col']
                bytes_col = co.columns['bytes_col']
                ds1List, ds2List, ds3List, ds4List = devCmts[fCmt]
                totalSeen = 0
                for idx, ds1ds2ds3ds4 in enumerate(zip(ds1List, ds2List, ds3List, ds4List)):
                    ds1, ds2, ds3, ds4 = ds1ds2ds3ds4
                    if fetchNbytes is None:
                        assert np.allclose(ds1, d[str(idx)])
                        assert np.allclose(ds2, dd[str(idx)])
                        assert ds3 == str_col[str(idx)]
                        assert ds4 == bytes_col[str(idx)]
                    else:
                        try:
                            arr1 = d[str(idx)]
                            assert np.allclose(ds1, arr1)
                            totalSeen += arr1.nbytes
                        except FileNotFoundError:
                            pass
                        try:
                            arr2 = dd[str(idx)]
                            assert np.allclose(ds2, arr2)
                            totalSeen += arr2.nbytes
                        except FileNotFoundError:
                            pass
                        try:
                            sval = str_col[str(idx)]
                            assert ds3 == sval
                            totalSeen += len(sval.encode())
                        except FileNotFoundError:
                            pass
                        try:
                            bval = bytes_col[str(idx)]
                            assert ds4 == bval
                            totalSeen += len(bval)
                        except FileNotFoundError:
                            pass
                        assert totalSeen <= fetchNbytes
            co.close()
        newRepo._env._close_environments()
Beispiel #11
0
def test_server_push_two_branch_then_clone_fetch_data_options(
        server_instance, repo, managed_tmpdir, array5by7, nMasterCommits,
        nMasterSamples, nDevCommits, nDevSamples, fetchBranch, fetchCommit,
        fetchAsetns, fetchNbytes, fetchAll_history):
    from hangar import Repository
    from hangar.records.summarize import list_history

    # Push master branch test
    masterCmts = {}
    co = repo.checkout(write=True)
    co.arraysets.init_arrayset(name='writtenaset',
                               shape=(5, 7),
                               dtype=np.float32)
    co.arraysets.init_arrayset(name='_two', shape=(20), dtype=np.float32)
    for cIdx in range(nMasterCommits):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList1 = []
        masterSampList2 = []
        with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd:
            for prevKey in list(d.keys())[1:]:
                d.remove(prevKey)
                dd.remove(prevKey)

            for sIdx in range(nMasterSamples):
                arr1 = np.random.randn(*array5by7.shape).astype(
                    np.float32) * 100
                d[str(sIdx)] = arr1
                masterSampList1.append(arr1)
                arr2 = np.random.randn(20).astype(np.float32)
                dd[str(sIdx)] = arr2
                masterSampList2.append(arr2)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmts[cmt] = (masterSampList1, masterSampList2)
        co.close()

    repo.remote.add('origin', server_instance)
    push1 = repo.remote.push('origin', 'master')
    assert push1 == 'master'
    masterHist = list_history(repo._env.refenv,
                              repo._env.branchenv,
                              branch_name='master')

    # Push dev branch test
    devCmts = masterCmts.copy()
    branch = repo.create_branch('testbranch')
    for cIdx in range(nDevCommits):
        co = repo.checkout(write=True, branch=branch.name)
        devSampList1 = []
        devSampList2 = []
        with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd:
            for prevKey in list(d.keys())[1:]:
                d.remove(prevKey)
                dd.remove(prevKey)

            for sIdx in range(nDevSamples):
                arr1 = np.random.randn(*array5by7.shape).astype(
                    np.float32) * 100
                d[str(sIdx)] = arr1
                devSampList1.append(arr1)
                arr2 = np.random.randn(20).astype(np.float32)
                dd[str(sIdx)] = arr2
                devSampList2.append(arr2)
        cmt = co.commit(f'dev commit number: {cIdx}')
        devCmts[cmt] = (devSampList1, devSampList2)
        co.close()

    push2 = repo.remote.push('origin', branch.name)
    assert push2 == branch.name
    branchHist = list_history(repo._env.refenv,
                              repo._env.branchenv,
                              branch_name=branch.name)

    # -------------------------- end setup ------------------------------------

    # Clone test (master branch)
    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User',
                  '*****@*****.**',
                  server_instance,
                  remove_old=True)
    newRepo.remote.fetch('origin', branch=branch.name)
    newRepo.create_branch('testbranch', base_commit=branchHist['head'])
    assert newRepo.list_branches() == [
        'master', 'origin/master', f'origin/{branch.name}', branch.name
    ]

    # ------------------ format arguments dependingon options -----------------

    kwargs = {
        'arrayset_names': fetchAsetns,
        'max_num_bytes': fetchNbytes,
        'retrieve_all_history': fetchAll_history,
    }
    if fetchBranch is not None:
        func = branchHist if fetchBranch == 'testbranch' else masterHist
        kwargs['branch'] = fetchBranch
        kwargs['commit'] = None
    else:
        func = branchHist if fetchBranch == 'br' else masterHist
        kwargs['branch'] = None
        kwargs['commit'] = func['head']

    if fetchAll_history is True:
        commits_to_check = func['order']
    else:
        commits_to_check = [func['head']]

    # ----------------------- retrieve data with desired options --------------

    # This case should fail
    if (fetchAll_history is True) and isinstance(fetchNbytes, int):
        try:
            with pytest.raises(ValueError):
                fetch_commits = newRepo.remote.fetch_data(remote='origin',
                                                          **kwargs)
        finally:
            newRepo._env._close_environments()
        return True
    # get data
    fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs)
    assert commits_to_check == fetch_commits

    # ------------- check that you got everything you expected ----------------

    for fCmt in fetch_commits:
        co = newRepo.checkout(commit=fCmt)
        assert co.commit_hash == fCmt

        # when we are checking one aset only
        if isinstance(fetchAsetns, tuple):
            d = co.arraysets[fetchAsetns[0]]
            # ensure we didn't fetch the other data simultaneously

            ds1SampList, ds2SampList = devCmts[fCmt]
            if fetchAsetns[0] == 'writtenaset':
                compare = ds1SampList
            else:
                compare = ds2SampList

            totalSeen = 0
            for idx, samp in enumerate(compare):
                if fetchNbytes is None:
                    assert np.allclose(samp, d[str(idx)])
                else:
                    try:
                        arr = d[str(idx)]
                        assert np.allclose(samp, arr)
                        totalSeen += arr.nbytes
                    except FileNotFoundError:
                        pass
                    assert totalSeen <= fetchNbytes

        # compare both asets at the same time
        else:
            d = co.arraysets['writtenaset']
            dd = co.arraysets['_two']
            ds1List, ds2List = devCmts[fCmt]
            totalSeen = 0
            for idx, ds1ds2 in enumerate(zip(ds1List, ds2List)):
                ds1, ds2 = ds1ds2
                if fetchNbytes is None:
                    assert np.allclose(ds1, d[str(idx)])
                    assert np.allclose(ds2, dd[str(idx)])
                else:
                    try:
                        arr1 = d[str(idx)]
                        assert np.allclose(ds1, arr1)
                        totalSeen += arr1.nbytes
                    except FileNotFoundError:
                        pass
                    try:
                        arr2 = dd[str(idx)]
                        assert np.allclose(ds2, arr2)
                        totalSeen += arr2.nbytes
                    except FileNotFoundError:
                        pass
                    assert totalSeen <= fetchNbytes
        co.close()
    newRepo._env._close_environments()