def view_data(ctx, repo: Repository, column, sample, startpoint, format_, plugin): """Use a plugin to view the data of some SAMPLE in COLUMN at STARTPOINT. """ from hangar.records.commiting import expand_short_commit_digest from hangar.records.heads import get_branch_head_commit, get_staging_branch_head from hangar import external kwargs = parse_custom_arguments(ctx.args) if startpoint in repo.list_branches(): base_commit = get_branch_head_commit(repo._env.branchenv, startpoint) elif startpoint: base_commit = expand_short_commit_digest(repo._env.refenv, startpoint) else: branch_name = get_staging_branch_head(repo._env.branchenv) base_commit = get_branch_head_commit(repo._env.branchenv, branch_name) co = repo.checkout(commit=base_commit) try: aset = co.columns.get(column) extension = format_.lstrip('.') if format_ else None data = aset[sample] try: external.show(data, plugin=plugin, extension=extension, **kwargs) except Exception as e: raise click.ClickException(e) except KeyError as e: raise click.ClickException(e) finally: co.close()
def diff(repo: Repository, dev, master): """Display diff of DEV commit/branch to MASTER commit/branch. If no MASTER is specified, then the staging area branch HEAD will will be used as the commit digest for MASTER. This operation will return a diff which could be interpreted as if you were merging the changes in DEV into MASTER. TODO: VERIFY ORDER OF OUTPUT IS CORRECT. """ from hangar.records.commiting import expand_short_commit_digest from hangar.records.commiting import get_staging_branch_head from hangar.records.summarize import status if dev not in repo.list_branches(): dev = expand_short_commit_digest(repo._env.refenv, dev) if master is None: master = get_staging_branch_head(repo._env.branchenv) elif master not in repo.list_branches(): master = expand_short_commit_digest(repo._env.refenv, master) diff_spec = repo.diff(master, dev) buf = status(hashenv=repo._env.hashenv, branch_name=dev, diff=diff_spec.diff) click.echo(buf.getvalue())
def branch_create(repo: Repository, name, startpoint): """Create a branch with NAME at STARTPOINT (short-digest or branch) If no STARTPOINT is provided, the new branch is positioned at the HEAD of the staging area branch, automatically. """ from hangar.records.commiting import expand_short_commit_digest from hangar.records.heads import get_branch_head_commit from hangar.records.heads import get_staging_branch_head branch_names = repo.list_branches() if name in branch_names: e = ValueError(f'branch name: {name} already exists') raise click.ClickException(e) try: if startpoint is None: branch = get_staging_branch_head(repo._env.branchenv) base_commit = get_branch_head_commit(repo._env.branchenv, branch) elif startpoint in branch_names: base_commit = get_branch_head_commit(repo._env.branchenv, startpoint) else: base_commit = expand_short_commit_digest(repo._env.refenv, startpoint) res = repo.create_branch(name, base_commit=base_commit) except (KeyError, ValueError, RuntimeError) as e: raise click.ClickException(e) click.echo(f'Created BRANCH: {res.name} HEAD: {res.digest}')
def branch_create(ctx, name, startpoint): """Create a branch with NAME at STARTPOINT (short-digest or branch) If no STARTPOINT is provided, the new branch is positioned at the HEAD of the staging area branch, automatically. """ from hangar.records.heads import get_branch_head_commit, get_staging_branch_head P = os.getcwd() repo = Repository(path=P) branch_names = repo.list_branches() if name in branch_names: raise ValueError(f'branch name: {name} already exists') if startpoint is None: branch = get_staging_branch_head(repo._env.branchenv) base_commit = get_branch_head_commit(repo._env.branchenv, branch) elif startpoint in branch_names: base_commit = get_branch_head_commit(repo._env.branchenv, startpoint) else: base_commit = expand_short_commit_digest(repo._env.refenv, startpoint) click.echo(f'BRANCH: ' + repo.create_branch(name, base_commit=base_commit) + f' HEAD: {base_commit}')
def fetch_data(repo: Repository, remote, startpoint, column, nbytes, all_): """Get data from REMOTE referenced by STARTPOINT (short-commit or branch). The default behavior is to only download a single commit's data or the HEAD commit of a branch. Please review optional arguments for other behaviors. """ from hangar.records.commiting import expand_short_commit_digest from hangar.records.heads import get_branch_head_commit from hangar.records.heads import get_staging_branch_head from hangar.utils import parse_bytes if startpoint is None: branch = get_staging_branch_head(repo._env.branchenv) commit = get_branch_head_commit(repo._env.branchenv, branch) elif startpoint in repo.list_branches(): commit = get_branch_head_commit(repo._env.branchenv, startpoint) else: commit = expand_short_commit_digest(repo._env.refenv, startpoint) click.echo(f'Fetching data for commit: {commit}') try: max_nbytes = parse_bytes(nbytes) except AttributeError: max_nbytes = None if len(column) == 0: column = None commits = repo.remote.fetch_data(remote=remote, commit=commit, column_names=column, max_num_bytes=max_nbytes, retrieve_all_history=all_) click.echo(f'completed data for commits: {commits}')
def test_checkout_writer_branch_nonexistant_branch_errors(dummy_repo: Repository): from hangar.records.heads import get_staging_branch_head runner = CliRunner() res = runner.invoke(cli.checkout, ['doesnotexist'], obj=dummy_repo) assert res.exit_code == 1 assert res.stdout == 'Error: branch with name: doesnotexist does not exist. cannot get head.\n' recorded_branch = get_staging_branch_head(dummy_repo._env.branchenv) assert recorded_branch == 'master' assert dummy_repo.writer_lock_held is False
def test_checkout_writer_branch_works(dummy_repo: Repository): from hangar.records.heads import get_staging_branch_head dummy_repo.create_branch('dev') runner = CliRunner() res = runner.invoke(cli.checkout, ['dev'], obj=dummy_repo) assert res.exit_code == 0 assert res.stdout == 'Writer checkout head set to branch: dev\n' recorded_branch = get_staging_branch_head(dummy_repo._env.branchenv) assert recorded_branch == 'dev' assert dummy_repo.writer_lock_held is False
def test_checkout_writer_branch_works(repo_20_filled_samples2): from hangar.records.heads import get_staging_branch_head repo_20_filled_samples2.create_branch('dev') runner = CliRunner() res = runner.invoke(cli.checkout, ['dev'], obj=repo_20_filled_samples2) assert res.exit_code == 0 assert res.stdout == 'Writer checkout head set to branch: dev\n' recorded_branch = get_staging_branch_head(repo_20_filled_samples2._env.branchenv) assert recorded_branch == 'dev' assert repo_20_filled_samples2.writer_lock_held is False
def export_data(ctx, repo: Repository, column, outdir, startpoint, sample, format_, plugin): """Export COLUMN sample data as it existed a STARTPOINT to some format and path. Specifying which sample to be exported is possible by using the switch ``--sample`` (without this, all the samples in the given column will be exported). Since hangar supports both int and str datatype for the sample name, specifying that while mentioning the sample name might be necessary at times. It is possible to do that by separating the name and type by a colon. Example: 1. if the sample name is string of numeric 10 - ``str:10`` or ``10`` 2. if the sample name is ``sample1`` - ``str:sample1`` or ``sample1`` 3. if the sample name is an int, let say 10 - ``int:10`` """ from hangar.records.commiting import expand_short_commit_digest from hangar.records.heads import get_branch_head_commit, get_staging_branch_head from hangar import external kwargs = parse_custom_arguments(ctx.args) if startpoint in repo.list_branches(): base_commit = get_branch_head_commit(repo._env.branchenv, startpoint) elif startpoint: base_commit = expand_short_commit_digest(repo._env.refenv, startpoint) else: branch_name = get_staging_branch_head(repo._env.branchenv) base_commit = get_branch_head_commit(repo._env.branchenv, branch_name) co = repo.checkout(commit=base_commit) try: aset = co.columns.get(column) sampleNames = [sample] if sample is not None else list(aset.keys()) extension = format_.lstrip('.') if format_ else None with aset, click.progressbar(sampleNames) as sNamesBar: for sampleN in sNamesBar: data = aset[sampleN] formated_sampleN = f'{type(sampleN).__name__}:{sampleN}' try: external.save(data, outdir, formated_sampleN, extension, plugin, **kwargs) except Exception as e: raise click.ClickException(e) except KeyError as e: raise click.ClickException(e) finally: co.close()
def import_data(ctx, repo: Repository, column, path, branch, plugin, overwrite): """Import file or directory of files at PATH to COLUMN in the staging area. If passing in a directory, all files in the directory will be imported, if passing in a file, just that files specified will be imported """ # TODO: ignore warning through env variable from types import GeneratorType from hangar import external from hangar.records.heads import get_staging_branch_head kwargs = parse_custom_arguments(ctx.args) if branch is None: branch = get_staging_branch_head(repo._env.branchenv) elif branch not in repo.list_branches(): raise click.ClickException( f'Branch name: {branch} does not exist, Exiting.') click.echo(f'Writing to branch: {branch}') co = repo.checkout(write=True, branch=branch) try: active_aset = co.columns.get(column) p = Path(path) files = [f.resolve() for f in p.iterdir()] if p.is_dir() else [p.resolve()] with active_aset as aset, click.progressbar(files) as filesBar: for f in filesBar: ext = ''.join(f.suffixes).strip( '.') # multi-suffix files (tar.bz2) loaded = external.load(f, plugin=plugin, extension=ext, **kwargs) if not isinstance(loaded, GeneratorType): loaded = [loaded] for arr, fname in loaded: if (not overwrite) and (fname in aset): continue try: aset[fname] = arr except ValueError as e: click.echo(e) except (ValueError, KeyError) as e: raise click.ClickException(e) finally: co.close()
def test_staging_head_branch_name_exists(self, diverse_repo): from hangar.records.heads import get_staging_branch_head from hangar.records.parsing import repo_branch_head_db_key_from_raw_key from hangar.diagnostics.integrity import _verify_branch_integrity bname = get_staging_branch_head(diverse_repo._env.branchenv) with diverse_repo._env.branchenv.begin(write=True) as txn: branchKey = repo_branch_head_db_key_from_raw_key(bname) txn.delete(branchKey) with pytest.raises( RuntimeError, match= 'Brach commit map compromised. Staging head refers to branch name' ): _verify_branch_integrity(diverse_repo._env.branchenv, diverse_repo._env.refenv)
def test_checkout_writer_branch_lock_held_errors(dummy_repo: Repository): from hangar.records.heads import get_staging_branch_head dummy_repo.create_branch('testbranch') co = dummy_repo.checkout(write=True, branch='master') try: runner = CliRunner() res = runner.invoke(cli.checkout, ['testbranch'], obj=dummy_repo) assert res.exit_code == 1 msg = res.stdout assert msg.startswith('Error: Cannot acquire the writer lock.') is True recorded_branch = get_staging_branch_head(dummy_repo._env.branchenv) assert recorded_branch == 'master' assert dummy_repo.writer_lock_held is True assert co.branch_name == 'master' finally: co.close() assert dummy_repo.writer_lock_held is False
def import_data(repo: Repository, arrayset, path, branch, plugin, overwrite): """Import file(s) at PATH to ARRAYSET in the staging area. """ from hangar.cli.io import imread from hangar.records.heads import get_staging_branch_head try: if branch is not None: if branch in repo.list_branches(): branch_name = branch else: click.echo(f'Branch name: {branch} does not exist, Exiting.') return None else: branch_name = get_staging_branch_head(repo._env.branchenv) click.echo(f'Writing to branch: {branch_name}') with warnings.catch_warnings(): warnings.simplefilter("ignore", UserWarning) co = repo.checkout(write=True, branch=branch_name) aset = co.arraysets.get(arrayset) if os.path.isfile(path): fname = os.path.basename(path) if not overwrite: if fname in aset: click.echo(f'skipping existing name: {fname} as overwrite flag not set') return None fNamePth = [(fname, path)] else: fnames = os.listdir(path) if not overwrite: fnames = [fname for fname in fnames if fname not in aset] fNamePth = [(fname, os.path.join(path, fname)) for fname in fnames] with aset as a, click.progressbar(fNamePth) as fnamesBar: for fn, fpth in fnamesBar: arr = imread(fpth, plugin=plugin) try: a[fn] = arr except ValueError as e: click.echo(e) finally: co.close()
def fetch_data(ctx, remote, startpoint, aset, nbytes, all_): """Get data from REMOTE referenced by STARTPOINT (short-commit or branch). The default behavior is to only download a single commit's data or the HEAD commit of a branch. Please review optional arguments for other behaviors """ from hangar.records.heads import get_branch_head_commit, get_staging_branch_head from hangar.utils import parse_bytes P = os.getcwd() repo = Repository(path=P) if startpoint is None: branch = get_staging_branch_head(repo._env.branchenv) commit = get_branch_head_commit(repo._env.branchenv, branch) click.echo( f'No startpoint supplied, fetching data of HEAD: {commit} for BRANCH: {branch}' ) elif startpoint in repo.list_branches(): commit = get_branch_head_commit(repo._env.branchenv, startpoint) click.echo( f'Fetching data for HEAD: {commit} of STARTPOINT BRANCH: {startpoint}' ) else: commit = expand_short_commit_digest(repo._env.refenv, startpoint) click.echo(f'Fetching data for STARTPOINT HEAD: {commit}') click.echo(f'aset argument: {aset}') try: max_nbytes = parse_bytes(nbytes) click.echo(f'nbytes argument: {max_nbytes}') except AttributeError: max_nbytes = None if len(aset) == 0: aset = None commits = repo.remote.fetch_data(remote=remote, commit=commit, arrayset_names=aset, max_num_bytes=max_nbytes, retrieve_all_history=all_) click.echo(f'completed data for commits: {commits}')