Esempio n. 1
0
def diff(repo: Repository, dev, master):
    """Display diff of DEV commit/branch to MASTER commit/branch.

    If no MASTER is specified, then the staging area branch HEAD will
    will be used as the commit digest for MASTER. This operation will
    return a diff which could be interpreted as if you were merging
    the changes in DEV into MASTER.

    TODO: VERIFY ORDER OF OUTPUT IS CORRECT.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.commiting import get_staging_branch_head
    from hangar.records.summarize import status

    if dev not in repo.list_branches():
        dev = expand_short_commit_digest(repo._env.refenv, dev)

    if master is None:
        master = get_staging_branch_head(repo._env.branchenv)
    elif master not in repo.list_branches():
        master = expand_short_commit_digest(repo._env.refenv, master)

    diff_spec = repo.diff(master, dev)
    buf = status(hashenv=repo._env.hashenv, branch_name=dev, diff=diff_spec.diff)
    click.echo(buf.getvalue())
Esempio n. 2
0
def test_branch_create_and_delete(written_two_cmt_server_repo):
    server, base_repo = written_two_cmt_server_repo

    co = base_repo.checkout(write=True)
    cmt = co.commit_hash
    co.close()

    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        new_repo = Repository(P, exists=False)
        try:
            res = runner.invoke(cli.clone, [
                '--name', 'Foo Tester', '--email', '*****@*****.**', f'{server}'
            ],
                                obj=new_repo)
            assert res.exit_code == 0

            res = runner.invoke(cli.branch_create, ['testbranch'],
                                obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Created BRANCH: testbranch HEAD: {cmt}\n"

            branches = new_repo.list_branches()
            assert branches == ['master', 'origin/master', 'testbranch']

            res = runner.invoke(cli.branch_remove, ['testbranch'],
                                obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Deleted BRANCH: testbranch HEAD: {cmt}\n"

            branches = new_repo.list_branches()
            assert branches == ['master', 'origin/master']

            new_repo.create_branch('secondtest')
            co = new_repo.checkout(write=True, branch='secondtest')
            co.add_str_column('test_meta')
            newDigest = co.commit('dummy commit')
            co.close()

            # re-open with staging set to master so we can try to delete secondtest
            co = new_repo.checkout(write=True, branch='master')
            co.close()

            res = runner.invoke(cli.branch_remove, ['secondtest'],
                                obj=new_repo)
            assert res.exit_code == 1

            res = runner.invoke(cli.branch_remove, ['secondtest', '-f'],
                                obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == f"Deleted BRANCH: secondtest HEAD: {newDigest}\n"

            res = runner.invoke(cli.branch_list, obj=new_repo)
            assert res.exit_code == 0
            assert res.stdout == "['master', 'origin/master']\n"
        finally:
            new_repo._env._close_environments()
Esempio n. 3
0
def test_cannot_operate_without_repo_init(managed_tmpdir):
    repo = Repository(path=managed_tmpdir, exists=False)

    with pytest.raises(RuntimeError):
        repo.writer_lock_held()
    with pytest.raises(RuntimeError):
        repo.checkout()
    with pytest.raises(RuntimeError):
        repo.writer_lock_held()
    with pytest.raises(RuntimeError):
        repo.log()
    with pytest.raises(RuntimeError):
        repo.summary()
    with pytest.raises(RuntimeError):
        repo.merge('fail', 'master', 'nonexistant')
    with pytest.raises(RuntimeError):
        repo.create_branch('test')
    with pytest.raises(RuntimeError):
        repo.list_branches()
    with pytest.raises(RuntimeError):
        repo.force_release_writer_lock()

    with pytest.raises(RuntimeError):
        repo.remote.add('origin', 'foo')
    with pytest.raises(RuntimeError):
        repo.remote.remove('origin')
    with pytest.raises(RuntimeError):
        repo.remote.fetch('origin', 'master')
    with pytest.raises(RuntimeError):
        repo.remote.fetch_data('origin', branch='master')
    with pytest.raises(RuntimeError):
        repo.remote.list_all()
    with pytest.raises(RuntimeError):
        repo.remote.ping('origin')
    with pytest.raises(RuntimeError):
        repo.remote.push('origin', 'master')
    with pytest.raises(RuntimeError):
        repo.remove_branch('master')

    with pytest.raises(RuntimeError):
        repo.path
    with pytest.raises(RuntimeError):
        repo.version
    with pytest.raises(RuntimeError):
        repo.writer_lock_held
    with pytest.raises(RuntimeError):
        repo.size_human
    with pytest.raises(RuntimeError):
        repo.size_nbytes

    assert repo._env.repo_is_initialized is False
Esempio n. 4
0
def branch_create(ctx, name, startpoint):
    """Create a branch with NAME at STARTPOINT (short-digest or branch)

    If no STARTPOINT is provided, the new branch is positioned at the HEAD of
    the staging area branch, automatically.
    """
    from hangar.records.heads import get_branch_head_commit, get_staging_branch_head

    P = os.getcwd()
    repo = Repository(path=P)
    branch_names = repo.list_branches()
    if name in branch_names:
        raise ValueError(f'branch name: {name} already exists')

    if startpoint is None:
        branch = get_staging_branch_head(repo._env.branchenv)
        base_commit = get_branch_head_commit(repo._env.branchenv, branch)
    elif startpoint in branch_names:
        base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    else:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)

    click.echo(f'BRANCH: ' +
               repo.create_branch(name, base_commit=base_commit) +
               f' HEAD: {base_commit}')
Esempio n. 5
0
def test_branch_create_and_list(written_two_cmt_server_repo):
    server, base_repo = written_two_cmt_server_repo

    co = base_repo.checkout(write=True)
    cmt = co.commit_hash
    co.close()

    runner = CliRunner()
    with runner.isolated_filesystem():
        P = getcwd()
        new_repo = Repository(P, exists=False)
        res = runner.invoke(
            cli.clone,
            ['--name', 'Foo Tester', '--email', '*****@*****.**', f'{server}'],
            obj=new_repo)
        assert res.exit_code == 0

        res = runner.invoke(cli.branch_create, ['testbranch'], obj=new_repo)
        assert res.exit_code == 0
        assert res.stdout == f"Created BRANCH: testbranch HEAD: {cmt}\n"

        branches = new_repo.list_branches()
        assert branches == ['master', 'origin/master', 'testbranch']

        res = runner.invoke(cli.branch_list, obj=new_repo)
        assert res.exit_code == 0
        assert res.stdout == "['master', 'origin/master', 'testbranch']\n"
Esempio n. 6
0
def view_data(ctx, repo: Repository, column, sample, startpoint, format_, plugin):
    """Use a plugin to view the data of some SAMPLE in COLUMN at STARTPOINT.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit, get_staging_branch_head
    from hangar import external

    kwargs = parse_custom_arguments(ctx.args)
    if startpoint in repo.list_branches():
        base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    elif startpoint:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)
    else:
        branch_name = get_staging_branch_head(repo._env.branchenv)
        base_commit = get_branch_head_commit(repo._env.branchenv, branch_name)

    co = repo.checkout(commit=base_commit)
    try:
        aset = co.columns.get(column)
        extension = format_.lstrip('.') if format_ else None
        data = aset[sample]
        try:
            external.show(data, plugin=plugin, extension=extension, **kwargs)
        except Exception as e:
            raise click.ClickException(e)
    except KeyError as e:
        raise click.ClickException(e)
    finally:
        co.close()
Esempio n. 7
0
def branch_create(repo: Repository, name, startpoint):
    """Create a branch with NAME at STARTPOINT (short-digest or branch)

    If no STARTPOINT is provided, the new branch is positioned at the HEAD of
    the staging area branch, automatically.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit
    from hangar.records.heads import get_staging_branch_head

    branch_names = repo.list_branches()
    if name in branch_names:
        e = ValueError(f'branch name: {name} already exists')
        raise click.ClickException(e)

    try:
        if startpoint is None:
            branch = get_staging_branch_head(repo._env.branchenv)
            base_commit = get_branch_head_commit(repo._env.branchenv, branch)
        elif startpoint in branch_names:
            base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
        else:
            base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)

        res = repo.create_branch(name, base_commit=base_commit)
    except (KeyError, ValueError, RuntimeError) as e:
        raise click.ClickException(e)

    click.echo(f'Created BRANCH: {res.name} HEAD: {res.digest}')
Esempio n. 8
0
def fetch_data(repo: Repository, remote, startpoint, column, nbytes, all_):
    """Get data from REMOTE referenced by STARTPOINT (short-commit or branch).

    The default behavior is to only download a single commit's data or the HEAD
    commit of a branch. Please review optional arguments for other behaviors.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit
    from hangar.records.heads import get_staging_branch_head
    from hangar.utils import parse_bytes

    if startpoint is None:
        branch = get_staging_branch_head(repo._env.branchenv)
        commit = get_branch_head_commit(repo._env.branchenv, branch)
    elif startpoint in repo.list_branches():
        commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    else:
        commit = expand_short_commit_digest(repo._env.refenv, startpoint)
    click.echo(f'Fetching data for commit: {commit}')

    try:
        max_nbytes = parse_bytes(nbytes)
    except AttributeError:
        max_nbytes = None
    if len(column) == 0:
        column = None

    commits = repo.remote.fetch_data(remote=remote,
                                     commit=commit,
                                     column_names=column,
                                     max_num_bytes=max_nbytes,
                                     retrieve_all_history=all_)
    click.echo(f'completed data for commits: {commits}')
Esempio n. 9
0
def test_push_and_clone_master_linear_history_multiple_commits(
        server_instance, repo, managed_tmpdir, array5by7, nCommits, nSamples):
    from hangar import Repository
    from hangar.records.summarize import list_history

    cmtList = []
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32)
    for cIdx in range(nCommits):
        if cIdx != 0:
            co = repo.checkout(write=True)
        sampList = []
        with co.columns['writtenaset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(nSamples):
                arr = np.random.randn(*array5by7.shape).astype(
                    np.float32) * 100
                d[str(sIdx)] = arr
                sampList.append(arr)
        cmt = co.commit(f'commit number: {cIdx}')
        cmtList.append((cmt, sampList))
        co.close()
    masterHist = list_history(repo._env.refenv,
                              repo._env.branchenv,
                              branch_name='master')

    repo.remote.add('origin', server_instance)
    push1 = repo.remote.push('origin', 'master')
    assert push1 == 'master'

    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User',
                  '*****@*****.**',
                  server_instance,
                  remove_old=True)
    assert newRepo.list_branches() == ['master', 'origin/master']
    for cmt, sampList in cmtList:
        with pytest.warns(UserWarning):
            nco = newRepo.checkout(commit=cmt)
        assert len(nco.columns) == 1
        assert 'writtenaset' in nco.columns
        assert len(nco.columns['writtenaset']) == len(sampList)

        assert nco.columns['writtenaset'].contains_remote_references is True
        remoteKeys = nco.columns['writtenaset'].remote_reference_keys
        assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys
        for idx, _ in enumerate(sampList):
            sIdx = str(idx)
            assert sIdx in nco.columns['writtenaset']
            with pytest.raises(FileNotFoundError):
                shouldNotExist = nco.columns['writtenaset'][sIdx]
        nco.close()
    cloneMasterHist = list_history(newRepo._env.refenv,
                                   newRepo._env.branchenv,
                                   branch_name='master')
    assert cloneMasterHist == masterHist
    newRepo._env._close_environments()
Esempio n. 10
0
def test_push_clone_digests_exceeding_server_nbyte_limit(
        mocker, server_instance_nbytes_limit, repo, managed_tmpdir):
    from hangar import Repository
    from hangar.remote import chunks, client

    # Push master branch test
    masterCmtList = []
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='aset', shape=(50, 50), dtype=np.float32)
    for cIdx in range(4):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList = []
        with co.columns['aset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(70):
                arr = np.random.randint(0, 255,
                                        size=(50, 50)).astype(np.float32)
                d[str(sIdx)] = arr
                masterSampList.append(arr)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmtList.append((cmt, masterSampList))
        co.close()

    repo.remote.add('origin', server_instance_nbytes_limit)

    spy = mocker.spy(chunks, 'tensorChunkedIterator')
    push1 = repo.remote.push('origin', 'master')
    assert chunks.tensorChunkedIterator.call_count == 6
    for call in spy.call_args_list:
        assert call[1][
            'uncomp_nbytes'] <= 550_000  # maximum amount over 100_000 observed in test development

    assert push1 == 'master'

    # Clone test (master branch)
    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User',
                  '*****@*****.**',
                  server_instance_nbytes_limit,
                  remove_old=True)
    assert newRepo.list_branches() == ['master', 'origin/master']

    spy = mocker.spy(client.HangarClient, 'fetch_data')
    for cmt, sampList in masterCmtList:
        newRepo.remote.fetch_data('origin', commit=cmt)
        nco = newRepo.checkout(commit=cmt)
        assert len(nco.columns) == 1
        assert 'aset' in nco.columns
        assert len(nco.columns['aset']) == 70
        for sIdx, samp in enumerate(sampList):
            assert np.allclose(nco.columns['aset'][str(sIdx)], samp)
        nco.close()
        del nco
    assert client.HangarClient.fetch_data.call_count == 8
    newRepo._env._close_environments()
Esempio n. 11
0
def branch_list(ctx):
    """list all branch names

    Includes both remote branches as well as local branches.
    """
    P = os.getcwd()
    repo = Repository(path=P)
    click.echo(repo.list_branches())
Esempio n. 12
0
def test_starting_up_repo_warns_should_exist_manual_args(managed_tmpdir):
    with pytest.warns(UserWarning):
        repo = Repository(path=managed_tmpdir, exists=True)
    repo.init(user_name='tester', user_email='*****@*****.**', remove_old=True)
    assert repo.list_branches() == ['master']
    assert os.path.isdir(repo._repo_path)
    assert repo._repo_path == os.path.join(managed_tmpdir, '.hangar')
    co = repo.checkout(write=True)
    assert co.diff.status() == 'CLEAN'
    co.close()
    repo._env._close_environments()
Esempio n. 13
0
def export_data(ctx, repo: Repository, column, outdir, startpoint, sample,
                format_, plugin):
    """Export COLUMN sample data as it existed a STARTPOINT to some format and path.

    Specifying which sample to be exported is possible by using the switch
    ``--sample`` (without this, all the samples in the given column will be
    exported). Since hangar supports both int and str datatype for the sample
    name, specifying that while mentioning the sample name might be necessary
    at times. It is possible to do that by separating the name and type by a
    colon.

    Example:

       1. if the sample name is string of numeric 10 - ``str:10`` or ``10``

       2. if the sample name is ``sample1`` - ``str:sample1`` or ``sample1``

       3. if the sample name is an int, let say 10 - ``int:10``
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit, get_staging_branch_head
    from hangar import external
    kwargs = parse_custom_arguments(ctx.args)

    if startpoint in repo.list_branches():
        base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    elif startpoint:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)
    else:
        branch_name = get_staging_branch_head(repo._env.branchenv)
        base_commit = get_branch_head_commit(repo._env.branchenv, branch_name)

    co = repo.checkout(commit=base_commit)
    try:
        aset = co.columns.get(column)
        sampleNames = [sample] if sample is not None else list(aset.keys())
        extension = format_.lstrip('.') if format_ else None
        with aset, click.progressbar(sampleNames) as sNamesBar:
            for sampleN in sNamesBar:
                data = aset[sampleN]
                formated_sampleN = f'{type(sampleN).__name__}:{sampleN}'
                try:
                    external.save(data, outdir, formated_sampleN, extension,
                                  plugin, **kwargs)
                except Exception as e:
                    raise click.ClickException(e)
    except KeyError as e:
        raise click.ClickException(e)
    finally:
        co.close()
Esempio n. 14
0
def test_push_clone_digests_exceeding_server_nbyte_limit(
        server_instance, repo, managed_tmpdir):
    from hangar.remote import config
    from hangar import Repository

    config.config['server']['grpc']['fetch_max_nbytes'] = 100_000
    config.config['client']['grpc']['push_max_nbytes'] = 100_000

    # Push master branch test
    masterCmtList = []
    co = repo.checkout(write=True)
    co.arraysets.init_arrayset(name='aset', shape=(50, 20), dtype=np.float32)
    for cIdx in range(4):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList = []
        with co.arraysets['aset'] as d:
            for prevKey in list(d.keys())[1:]:
                d.remove(prevKey)
            for sIdx in range(70):
                arr = np.random.randn(50, 20).astype(np.float32)
                d[str(sIdx)] = arr
                masterSampList.append(arr)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmtList.append((cmt, masterSampList))
        co.close()

    repo.remote.add('origin', server_instance)
    push1 = repo.remote.push('origin', 'master')
    assert push1 == 'master'

    # Clone test (master branch)
    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User',
                  '*****@*****.**',
                  server_instance,
                  remove_old=True)
    assert newRepo.list_branches() == ['master', 'origin/master']
    for cmt, sampList in masterCmtList:
        newRepo.remote.fetch_data('origin', commit=cmt)
        nco = newRepo.checkout(commit=cmt)
        assert len(nco.arraysets) == 1
        assert 'aset' in nco.arraysets
        assert len(nco.arraysets['aset']) == 70
        for sIdx, samp in enumerate(sampList):
            assert np.allclose(nco.arraysets['aset'][str(sIdx)], samp)
        nco.close()
    newRepo._env._close_environments()
Esempio n. 15
0
def test_push_restricted_with_right_username_password(
        server_instance_push_restricted, repo, managed_tmpdir):
    from hangar import Repository

    # Push master branch test
    masterCmtList = []
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='aset', shape=(50, 20), dtype=np.float32)
    for cIdx in range(1):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList = []
        with co.columns['aset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(70):
                arr = np.random.randn(50, 20).astype(np.float32)
                d[str(sIdx)] = arr
                masterSampList.append(arr)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmtList.append((cmt, masterSampList))
        co.close()

    repo.remote.add('origin', server_instance_push_restricted)
    push1 = repo.remote.push('origin',
                             'master',
                             username='******',
                             password='******')
    assert push1 == 'master'

    # Clone test (master branch)
    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User',
                  '*****@*****.**',
                  server_instance_push_restricted,
                  remove_old=True)
    assert newRepo.list_branches() == ['master', 'origin/master']
    for cmt, sampList in masterCmtList:
        newRepo.remote.fetch_data('origin', commit=cmt)
        nco = newRepo.checkout(commit=cmt)
        assert len(nco.columns) == 1
        assert 'aset' in nco.columns
        assert len(nco.columns['aset']) == 70
        for sIdx, samp in enumerate(sampList):
            assert np.allclose(nco.columns['aset'][str(sIdx)], samp)
        nco.close()
    newRepo._env._close_environments()
Esempio n. 16
0
def log(repo: Repository, startpoint):
    """Display commit graph starting at STARTPOINT (short-digest or name)

    If no argument is passed in, the staging area branch HEAD will be used as the
    starting point.
    """
    from hangar.records.commiting import expand_short_commit_digest

    if startpoint is None:
        click.echo(repo.log())
    elif startpoint in repo.list_branches():
        click.echo(repo.log(branch=startpoint))
    else:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)
        click.echo(repo.log(commit=base_commit))
Esempio n. 17
0
def log(ctx, startpoint):
    """Display commit graph starting at STARTPOINT (short-digest or name)

    If no argument is passed in, the staging area branch HEAD will be used as the
    starting point.
    """
    P = os.getcwd()
    repo = Repository(path=P)
    if startpoint is None:
        click.echo(repo.log())
    elif startpoint in repo.list_branches():
        click.echo(repo.log(branch=startpoint))
    else:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)
        click.echo(repo.log(commit=base_commit))
Esempio n. 18
0
def import_data(ctx, repo: Repository, column, path, branch, plugin,
                overwrite):
    """Import file or directory of files at PATH to COLUMN in the staging area.

    If passing in a directory, all files in the directory will be imported, if
    passing in a file, just that files specified will be
    imported
    """
    # TODO: ignore warning through env variable
    from types import GeneratorType
    from hangar import external
    from hangar.records.heads import get_staging_branch_head

    kwargs = parse_custom_arguments(ctx.args)
    if branch is None:
        branch = get_staging_branch_head(repo._env.branchenv)
    elif branch not in repo.list_branches():
        raise click.ClickException(
            f'Branch name: {branch} does not exist, Exiting.')
    click.echo(f'Writing to branch: {branch}')

    co = repo.checkout(write=True, branch=branch)
    try:
        active_aset = co.columns.get(column)
        p = Path(path)
        files = [f.resolve()
                 for f in p.iterdir()] if p.is_dir() else [p.resolve()]
        with active_aset as aset, click.progressbar(files) as filesBar:
            for f in filesBar:
                ext = ''.join(f.suffixes).strip(
                    '.')  # multi-suffix files (tar.bz2)
                loaded = external.load(f,
                                       plugin=plugin,
                                       extension=ext,
                                       **kwargs)
                if not isinstance(loaded, GeneratorType):
                    loaded = [loaded]
                for arr, fname in loaded:
                    if (not overwrite) and (fname in aset):
                        continue
                    try:
                        aset[fname] = arr
                    except ValueError as e:
                        click.echo(e)
    except (ValueError, KeyError) as e:
        raise click.ClickException(e)
    finally:
        co.close()
Esempio n. 19
0
def summary(repo: Repository, startpoint):
    """Display content summary at STARTPOINT (short-digest or branch).

    If no argument is passed in, the staging area branch HEAD wil be used as the
    starting point. In order to recieve a machine readable, and more complete
    version of this information, please see the ``Repository.summary()`` method
    of the API.
    """
    from hangar.records.commiting import expand_short_commit_digest

    if startpoint is None:
        click.echo(repo.summary())
    elif startpoint in repo.list_branches():
        click.echo(repo.summary(branch=startpoint))
    else:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)
        click.echo(repo.summary(commit=base_commit))
Esempio n. 20
0
def import_data(repo: Repository, arrayset, path, branch, plugin, overwrite):
    """Import file(s) at PATH to ARRAYSET in the staging area.
    """
    from hangar.cli.io import imread
    from hangar.records.heads import get_staging_branch_head

    try:
        if branch is not None:
            if branch in repo.list_branches():
                branch_name = branch
            else:
                click.echo(f'Branch name: {branch} does not exist, Exiting.')
                return None
        else:
            branch_name = get_staging_branch_head(repo._env.branchenv)
        click.echo(f'Writing to branch: {branch_name}')

        with warnings.catch_warnings():
            warnings.simplefilter("ignore", UserWarning)
            co = repo.checkout(write=True, branch=branch_name)
            aset = co.arraysets.get(arrayset)

        if os.path.isfile(path):
            fname = os.path.basename(path)
            if not overwrite:
                if fname in aset:
                    click.echo(f'skipping existing name: {fname} as overwrite flag not set')
                    return None
            fNamePth = [(fname, path)]
        else:
            fnames = os.listdir(path)
            if not overwrite:
                fnames = [fname for fname in fnames if fname not in aset]
            fNamePth = [(fname, os.path.join(path, fname)) for fname in fnames]

        with aset as a, click.progressbar(fNamePth) as fnamesBar:
            for fn, fpth in fnamesBar:
                arr = imread(fpth, plugin=plugin)
                try:
                    a[fn] = arr
                except ValueError as e:
                    click.echo(e)
    finally:
        co.close()
Esempio n. 21
0
def fetch_data(ctx, remote, startpoint, aset, nbytes, all_):
    """Get data from REMOTE referenced by STARTPOINT (short-commit or branch).

    The default behavior is to only download a single commit's data or the HEAD
    commit of a branch. Please review optional arguments for other behaviors
    """
    from hangar.records.heads import get_branch_head_commit, get_staging_branch_head
    from hangar.utils import parse_bytes

    P = os.getcwd()
    repo = Repository(path=P)
    if startpoint is None:
        branch = get_staging_branch_head(repo._env.branchenv)
        commit = get_branch_head_commit(repo._env.branchenv, branch)
        click.echo(
            f'No startpoint supplied, fetching data of HEAD: {commit} for BRANCH: {branch}'
        )
    elif startpoint in repo.list_branches():
        commit = get_branch_head_commit(repo._env.branchenv, startpoint)
        click.echo(
            f'Fetching data for HEAD: {commit} of STARTPOINT BRANCH: {startpoint}'
        )
    else:
        commit = expand_short_commit_digest(repo._env.refenv, startpoint)
        click.echo(f'Fetching data for STARTPOINT HEAD: {commit}')

    click.echo(f'aset argument: {aset}')
    try:
        max_nbytes = parse_bytes(nbytes)
        click.echo(f'nbytes argument: {max_nbytes}')
    except AttributeError:
        max_nbytes = None

    if len(aset) == 0:
        aset = None

    commits = repo.remote.fetch_data(remote=remote,
                                     commit=commit,
                                     arrayset_names=aset,
                                     max_num_bytes=max_nbytes,
                                     retrieve_all_history=all_)
    click.echo(f'completed data for commits: {commits}')
Esempio n. 22
0
def export_data(repo: Repository, startpoint, arrayset, out, sample, format_,
                plugin):
    """export ARRAYSET sample data as it existed a STARTPOINT to some format and path.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit
    from hangar.cli.io import imsave

    if startpoint in repo.list_branches():
        base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    else:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)

    try:
        co = repo.checkout(write=False, commit=base_commit)
        arrayset = co.arraysets[arrayset]
        if sample:
            sampleNames = [sample]
        else:
            sampleNames = list(arrayset.keys())

        if format_:
            format_ = format_.lstrip('.')
        outP = os.path.expanduser(os.path.normpath(out))

        with arrayset as aset, click.progressbar(sampleNames) as sNamesBar:
            for sampleN in sNamesBar:
                if format_:
                    if sampleN.endswith(format_):
                        outFP = os.path.join(outP, f'{sampleN}')
                    else:
                        outFP = os.path.join(outP, f'{sampleN}.{format_}')
                else:
                    outFP = os.path.join(outP, f'{sampleN}')
                try:
                    data = aset[sampleN]
                    imsave(outFP, data)
                except KeyError as e:
                    click.echo(e)
    finally:
        co.close()
Esempio n. 23
0
def view_data(repo: Repository, startpoint, arrayset, sample, plugin):
    """Use a plugin to view the data of some SAMPLE in ARRAYSET at STARTPOINT.
    """
    from hangar.records.commiting import expand_short_commit_digest
    from hangar.records.heads import get_branch_head_commit
    from hangar.cli.io import imshow, show

    if startpoint in repo.list_branches():
        base_commit = get_branch_head_commit(repo._env.branchenv, startpoint)
    else:
        base_commit = expand_short_commit_digest(repo._env.refenv, startpoint)

    try:
        co = repo.checkout(write=False, commit=base_commit)
        arrayset = co.arraysets[arrayset]
        try:
            data = arrayset[sample]
            imshow(data, plugin=plugin)
            show()
        except KeyError as e:
            click.echo(e)
    finally:
        co.close()
Esempio n. 24
0
class Hinterface(object):
    """
    Interface class to interact with hangar repositories. It enables
    the APIs to ignore the internals of hangar and can utilize the high
    level functions
    """
    def __init__(self,
                 path,
                 branch='master',
                 arrayset_name=None,
                 sample_name=None):
        if not path.exists():
            raise FileNotFoundError("Repository does not exist")
        self.repo = Repository(path)
        # TODO: fix hangar's version compatibility check
        if not self.repo.initialized:
            raise RuntimeError("Repository not initialized")
        self.branch = branch
        self.arrayset_name = arrayset_name
        self.sample_name = sample_name
        self.rcheckout = self.repo.checkout(branch=self.branch)

    @classmethod
    def create_repo(cls, path, username, email, desc=None, create_path=True):
        # TODO: Remove if it is not necessary
        if not path.exists() and create_path:
            path.mkdir()
        repo = Repository(path)
        repo.init(username, email)

    @property
    def repo_details(self):
        cmt_details = self.repo.log(return_contents=True)
        # TODO: make sure pop returns the latest
        try:
            top = cmt_details['order'].pop()
        except IndexError:
            cmt_time = None
        else:
            cmt_time = cmt_details['specs'][top]['commit_time']
        return {
            "last_commit_time": cmt_time,
            "total_commit_count": len(cmt_details["order"]),
            "branch_count": len(self.repo.list_branches()),
            "hangar_version": self.repo.version
        }

    @property
    def arraysets(self):
        if self.arrayset_name:
            yield self.rcheckout.arraysets[self.arrayset_name]
        else:
            for val in self.rcheckout.arraysets.values():
                yield val

    @property
    def sample_names(self):
        if self.arrayset_name:
            aset = self.rcheckout[self.arrayset_name]
            yield aset.name, list(aset.keys())
        else:
            for aset in self.rcheckout.arraysets.values():
                yield aset.name, list(aset.keys())

    def get_samples(self, plugin_name=None):
        pass
Esempio n. 25
0
def test_server_push_two_branch_then_clone_fetch_data_options(
        server_instance, repo, managed_tmpdir, array5by7, nMasterCommits,
        nMasterSamples, nDevCommits, nDevSamples, fetchBranch, fetchCommit,
        fetchAsetns, fetchNbytes, fetchAll_history):
    from hangar import Repository
    from hangar.records.summarize import list_history

    # Push master branch test
    masterCmts = {}
    co = repo.checkout(write=True)
    co.arraysets.init_arrayset(name='writtenaset',
                               shape=(5, 7),
                               dtype=np.float32)
    co.arraysets.init_arrayset(name='_two', shape=(20), dtype=np.float32)
    for cIdx in range(nMasterCommits):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList1 = []
        masterSampList2 = []
        with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd:
            for prevKey in list(d.keys())[1:]:
                d.remove(prevKey)
                dd.remove(prevKey)

            for sIdx in range(nMasterSamples):
                arr1 = np.random.randn(*array5by7.shape).astype(
                    np.float32) * 100
                d[str(sIdx)] = arr1
                masterSampList1.append(arr1)
                arr2 = np.random.randn(20).astype(np.float32)
                dd[str(sIdx)] = arr2
                masterSampList2.append(arr2)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmts[cmt] = (masterSampList1, masterSampList2)
        co.close()

    repo.remote.add('origin', server_instance)
    push1 = repo.remote.push('origin', 'master')
    assert push1 == 'master'
    masterHist = list_history(repo._env.refenv,
                              repo._env.branchenv,
                              branch_name='master')

    # Push dev branch test
    devCmts = masterCmts.copy()
    branch = repo.create_branch('testbranch')
    for cIdx in range(nDevCommits):
        co = repo.checkout(write=True, branch=branch.name)
        devSampList1 = []
        devSampList2 = []
        with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd:
            for prevKey in list(d.keys())[1:]:
                d.remove(prevKey)
                dd.remove(prevKey)

            for sIdx in range(nDevSamples):
                arr1 = np.random.randn(*array5by7.shape).astype(
                    np.float32) * 100
                d[str(sIdx)] = arr1
                devSampList1.append(arr1)
                arr2 = np.random.randn(20).astype(np.float32)
                dd[str(sIdx)] = arr2
                devSampList2.append(arr2)
        cmt = co.commit(f'dev commit number: {cIdx}')
        devCmts[cmt] = (devSampList1, devSampList2)
        co.close()

    push2 = repo.remote.push('origin', branch.name)
    assert push2 == branch.name
    branchHist = list_history(repo._env.refenv,
                              repo._env.branchenv,
                              branch_name=branch.name)

    # -------------------------- end setup ------------------------------------

    # Clone test (master branch)
    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User',
                  '*****@*****.**',
                  server_instance,
                  remove_old=True)
    newRepo.remote.fetch('origin', branch=branch.name)
    newRepo.create_branch('testbranch', base_commit=branchHist['head'])
    assert newRepo.list_branches() == [
        'master', 'origin/master', f'origin/{branch.name}', branch.name
    ]

    # ------------------ format arguments dependingon options -----------------

    kwargs = {
        'arrayset_names': fetchAsetns,
        'max_num_bytes': fetchNbytes,
        'retrieve_all_history': fetchAll_history,
    }
    if fetchBranch is not None:
        func = branchHist if fetchBranch == 'testbranch' else masterHist
        kwargs['branch'] = fetchBranch
        kwargs['commit'] = None
    else:
        func = branchHist if fetchBranch == 'br' else masterHist
        kwargs['branch'] = None
        kwargs['commit'] = func['head']

    if fetchAll_history is True:
        commits_to_check = func['order']
    else:
        commits_to_check = [func['head']]

    # ----------------------- retrieve data with desired options --------------

    # This case should fail
    if (fetchAll_history is True) and isinstance(fetchNbytes, int):
        try:
            with pytest.raises(ValueError):
                fetch_commits = newRepo.remote.fetch_data(remote='origin',
                                                          **kwargs)
        finally:
            newRepo._env._close_environments()
        return True
    # get data
    fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs)
    assert commits_to_check == fetch_commits

    # ------------- check that you got everything you expected ----------------

    for fCmt in fetch_commits:
        co = newRepo.checkout(commit=fCmt)
        assert co.commit_hash == fCmt

        # when we are checking one aset only
        if isinstance(fetchAsetns, tuple):
            d = co.arraysets[fetchAsetns[0]]
            # ensure we didn't fetch the other data simultaneously

            ds1SampList, ds2SampList = devCmts[fCmt]
            if fetchAsetns[0] == 'writtenaset':
                compare = ds1SampList
            else:
                compare = ds2SampList

            totalSeen = 0
            for idx, samp in enumerate(compare):
                if fetchNbytes is None:
                    assert np.allclose(samp, d[str(idx)])
                else:
                    try:
                        arr = d[str(idx)]
                        assert np.allclose(samp, arr)
                        totalSeen += arr.nbytes
                    except FileNotFoundError:
                        pass
                    assert totalSeen <= fetchNbytes

        # compare both asets at the same time
        else:
            d = co.arraysets['writtenaset']
            dd = co.arraysets['_two']
            ds1List, ds2List = devCmts[fCmt]
            totalSeen = 0
            for idx, ds1ds2 in enumerate(zip(ds1List, ds2List)):
                ds1, ds2 = ds1ds2
                if fetchNbytes is None:
                    assert np.allclose(ds1, d[str(idx)])
                    assert np.allclose(ds2, dd[str(idx)])
                else:
                    try:
                        arr1 = d[str(idx)]
                        assert np.allclose(ds1, arr1)
                        totalSeen += arr1.nbytes
                    except FileNotFoundError:
                        pass
                    try:
                        arr2 = dd[str(idx)]
                        assert np.allclose(ds2, arr2)
                        totalSeen += arr2.nbytes
                    except FileNotFoundError:
                        pass
                    assert totalSeen <= fetchNbytes
        co.close()
    newRepo._env._close_environments()
Esempio n. 26
0
def test_server_push_second_branch_with_new_commit_then_clone_partial_fetch(
        server_instance, repo, managed_tmpdir, array5by7, nMasterCommits,
        nMasterSamples, nDevCommits, nDevSamples):
    from hangar import Repository
    from hangar.records.summarize import list_history

    # Push master branch test
    masterCmtList = []
    co = repo.checkout(write=True)
    co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32)
    for cIdx in range(nMasterCommits):
        if cIdx != 0:
            co = repo.checkout(write=True)
        masterSampList = []
        with co.columns['writtenaset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(nMasterSamples):
                arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr
                masterSampList.append(arr)
        cmt = co.commit(f'master commit number: {cIdx}')
        masterCmtList.append((cmt, masterSampList))
        co.close()

    repo.remote.add('origin', server_instance)
    push1 = repo.remote.push('origin', 'master')
    assert push1 == 'master'
    masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master')

    # Push dev branch test
    devCmtList = []
    branch = repo.create_branch('testbranch')
    for cIdx in range(nDevCommits):
        co = repo.checkout(write=True, branch=branch.name)
        devSampList = []
        with co.columns['writtenaset'] as d:
            for prevKey in list(d.keys())[1:]:
                del d[prevKey]
            for sIdx in range(nDevSamples):
                arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100
                d[str(sIdx)] = arr
                devSampList.append(arr)
        cmt = co.commit(f'dev commit number: {cIdx}')
        devCmtList.append((cmt, devSampList))
        co.close()

    push2 = repo.remote.push('origin', branch.name)
    assert push2 == branch.name
    branchHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name=branch.name)

    # Clone test (master branch)
    new_tmpdir = pjoin(managed_tmpdir, 'new')
    mkdir(new_tmpdir)
    newRepo = Repository(path=new_tmpdir, exists=False)
    newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True)
    assert newRepo.list_branches() == ['master', 'origin/master']
    for cmt, sampList in masterCmtList:
        with pytest.warns(UserWarning):
            nco = newRepo.checkout(commit=cmt)
        assert len(nco.columns) == 1
        assert 'writtenaset' in nco.columns
        assert len(nco.columns['writtenaset']) == nMasterSamples

        assert nco.columns['writtenaset'].contains_remote_references is True
        remoteKeys = nco.columns['writtenaset'].remote_reference_keys
        assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys
        for idx, _ in enumerate(sampList):
            sIdx = str(idx)
            assert sIdx in nco.columns['writtenaset']
            with pytest.raises(FileNotFoundError):
                shouldNotExist = nco.columns['writtenaset'][sIdx]
        nco.close()
    cloneMasterHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name='master')
    assert cloneMasterHist == masterHist

    # Fetch test
    fetch = newRepo.remote.fetch('origin', branch=branch.name)
    assert fetch == f'origin/{branch.name}'
    assert newRepo.list_branches() == ['master', 'origin/master', f'origin/{branch.name}']
    for cmt, sampList in devCmtList:

        with pytest.warns(UserWarning):
            nco = newRepo.checkout(commit=cmt)
        assert len(nco.columns) == 1
        assert 'writtenaset' in nco.columns
        assert len(nco.columns['writtenaset']) == nDevSamples

        assert nco.columns['writtenaset'].contains_remote_references is True
        remoteKeys = nco.columns['writtenaset'].remote_reference_keys
        assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys
        for idx, _ in enumerate(sampList):
            sIdx = str(idx)
            assert sIdx in nco.columns['writtenaset']
            with pytest.raises(FileNotFoundError):
                shouldNotExist = nco.columns['writtenaset'][sIdx]
        nco.close()

    cloneBranchHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name=f'origin/{branch.name}')
    assert cloneBranchHist == branchHist
    newRepo._env._close_environments()
Esempio n. 27
0
def branch_list(repo: Repository):
    """List all branch names.

    Includes both remote branches as well as local branches.
    """
    click.echo(repo.list_branches())
Esempio n. 28
0
    def test_server_push_two_branch_then_clone_fetch_data_options(
            self, two_branch_multi_commit_repo_class, managed_tmpdir_class, array5by7_class,
            fetchBranch, fetchCommit, fetchAsetns, fetchNbytes, fetchAll_history, tmp_path_factory):
        from hangar import Repository
        from operator import eq

        branch, branchHist, devCmts, masterHist, server_instance = two_branch_multi_commit_repo_class

        # Clone test (master branch)
        _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True)
        new_tmpdir = str(_new_tmpdir)
        newRepo = Repository(path=new_tmpdir, exists=False)
        newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True)
        newRepo.remote.fetch('origin', branch=branch.name)
        newRepo.create_branch('testbranch', base_commit=branchHist['head'])
        assert newRepo.list_branches() == ['master', 'origin/master', f'origin/{branch.name}', branch.name]

        # ------------------ format arguments depending on options -----------------

        kwargs = {
            'column_names': fetchAsetns,
            'max_num_bytes': fetchNbytes,
            'retrieve_all_history': fetchAll_history,
        }
        if fetchBranch is not None:
            func = branchHist if fetchBranch == 'testbranch' else masterHist
            kwargs['branch'] = fetchBranch
            kwargs['commit'] = None
        else:
            func = branchHist if fetchBranch == 'br' else masterHist
            kwargs['branch'] = None
            kwargs['commit'] = func['head']

        if fetchAll_history is True:
            commits_to_check = func['order']
        else:
            commits_to_check = [func['head']]

        # ----------------------- retrieve data with desired options --------------

        # This case should fail
        if (fetchAll_history is True) and isinstance(fetchNbytes, int):
            try:
                with pytest.raises(ValueError):
                    fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs)
            finally:
                newRepo._env._close_environments()
            return True
        # get data
        fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs)
        assert commits_to_check == fetch_commits

        # ------------- check that you got everything you expected ----------------

        for fCmt in fetch_commits:
            co = newRepo.checkout(commit=fCmt)
            assert co.commit_hash == fCmt

            # when we are checking one aset only
            if isinstance(fetchAsetns, tuple):
                d = co.columns[fetchAsetns[0]]
                # ensure we didn't fetch the other data simultaneously

                ds1SampList, ds2SampList, ds3SampList, ds4SampList = devCmts[fCmt]
                if fetchAsetns[0] == 'writtenaset':
                    compare = ds1SampList
                    cmp_func = np.allclose
                elif fetchAsetns[0] == '_two':
                    compare = ds2SampList
                    cmp_func = np.allclose
                elif fetchAsetns[0] == 'str_col':
                    compare = ds3SampList
                    cmp_func = eq
                else:
                    compare = ds4SampList
                    cmp_func = eq

                totalSeen = 0
                for idx, samp in enumerate(compare):
                    if fetchNbytes is None:
                        assert cmp_func(samp, d[str(idx)])
                    else:
                        try:
                            arr = d[str(idx)]
                            assert cmp_func(samp, arr)
                            try:
                                totalSeen += arr.nbytes
                            except AttributeError:
                                totalSeen += len(arr)
                        except FileNotFoundError:
                            pass
                        assert totalSeen <= fetchNbytes

            # compare both asets at the same time
            else:
                d = co.columns['writtenaset']
                dd = co.columns['_two']
                str_col = co.columns['str_col']
                bytes_col = co.columns['bytes_col']
                ds1List, ds2List, ds3List, ds4List = devCmts[fCmt]
                totalSeen = 0
                for idx, ds1ds2ds3ds4 in enumerate(zip(ds1List, ds2List, ds3List, ds4List)):
                    ds1, ds2, ds3, ds4 = ds1ds2ds3ds4
                    if fetchNbytes is None:
                        assert np.allclose(ds1, d[str(idx)])
                        assert np.allclose(ds2, dd[str(idx)])
                        assert ds3 == str_col[str(idx)]
                        assert ds4 == bytes_col[str(idx)]
                    else:
                        try:
                            arr1 = d[str(idx)]
                            assert np.allclose(ds1, arr1)
                            totalSeen += arr1.nbytes
                        except FileNotFoundError:
                            pass
                        try:
                            arr2 = dd[str(idx)]
                            assert np.allclose(ds2, arr2)
                            totalSeen += arr2.nbytes
                        except FileNotFoundError:
                            pass
                        try:
                            sval = str_col[str(idx)]
                            assert ds3 == sval
                            totalSeen += len(sval.encode())
                        except FileNotFoundError:
                            pass
                        try:
                            bval = bytes_col[str(idx)]
                            assert ds4 == bval
                            totalSeen += len(bval)
                        except FileNotFoundError:
                            pass
                        assert totalSeen <= fetchNbytes
            co.close()
        newRepo._env._close_environments()