def test_push_fetch_records(server_instance, backend): runner = CliRunner() with runner.isolated_filesystem(): repo = Repository(getcwd(), exists=False) repo.init('foo', 'bar') dummyData = np.arange(50) co1 = repo.checkout(write=True, branch='master') co1.arraysets.init_arrayset( name='dummy', prototype=dummyData, named_samples=True, backend_opts=backend) for idx in range(10): dummyData[:] = idx co1.arraysets['dummy'][str(idx)] = dummyData co1.metadata['hello'] = 'world' co1.metadata['somemetadatakey'] = 'somemetadatavalue' cmt1 = co1.commit('first commit adding dummy data and hello meta') co1.close() repo.create_branch('testbranch') co2 = repo.checkout(write=True, branch='testbranch') for idx in range(10, 20): dummyData[:] = idx co2.arraysets['dummy'][str(idx)] = dummyData co2.metadata['foo'] = 'bar' cmt2 = co2.commit('first commit on test branch adding non-conflict data and meta') co2.close() repo.remote.add('origin', server_instance) res = runner.invoke(cli.push, ['origin', 'master'], obj=repo) assert res.exit_code == 0 res = runner.invoke(cli.push, ['origin', 'testbranch'], obj=repo) assert res.exit_code == 0
def test_push_fetch_records(server_instance, backend): runner = CliRunner() with runner.isolated_filesystem(): repo = Repository(getcwd(), exists=False) try: repo.init('foo', 'bar') dummyData = np.arange(50) co1 = repo.checkout(write=True, branch='master') co1.add_ndarray_column(name='dummy', prototype=dummyData, backend=backend) for idx in range(10): dummyData[:] = idx co1.columns['dummy'][str(idx)] = dummyData cmt1 = co1.commit('first commit adding dummy data') co1.close() repo.create_branch('testbranch') co2 = repo.checkout(write=True, branch='testbranch') for idx in range(10, 20): dummyData[:] = idx co2.columns['dummy'][str(idx)] = dummyData cmt2 = co2.commit( 'first commit on test branch adding non-conflict data') co2.close() repo.remote.add('origin', server_instance) res = runner.invoke(cli.push, ['origin', 'master'], obj=repo) assert res.exit_code == 0 res = runner.invoke(cli.push, ['origin', 'testbranch'], obj=repo) assert res.exit_code == 0 finally: repo._env._close_environments()
def test_branch_create_and_delete(written_two_cmt_server_repo): server, base_repo = written_two_cmt_server_repo co = base_repo.checkout(write=True) cmt = co.commit_hash co.close() runner = CliRunner() with runner.isolated_filesystem(): P = getcwd() new_repo = Repository(P, exists=False) try: res = runner.invoke(cli.clone, [ '--name', 'Foo Tester', '--email', '*****@*****.**', f'{server}' ], obj=new_repo) assert res.exit_code == 0 res = runner.invoke(cli.branch_create, ['testbranch'], obj=new_repo) assert res.exit_code == 0 assert res.stdout == f"Created BRANCH: testbranch HEAD: {cmt}\n" branches = new_repo.list_branches() assert branches == ['master', 'origin/master', 'testbranch'] res = runner.invoke(cli.branch_remove, ['testbranch'], obj=new_repo) assert res.exit_code == 0 assert res.stdout == f"Deleted BRANCH: testbranch HEAD: {cmt}\n" branches = new_repo.list_branches() assert branches == ['master', 'origin/master'] new_repo.create_branch('secondtest') co = new_repo.checkout(write=True, branch='secondtest') co.add_str_column('test_meta') newDigest = co.commit('dummy commit') co.close() # re-open with staging set to master so we can try to delete secondtest co = new_repo.checkout(write=True, branch='master') co.close() res = runner.invoke(cli.branch_remove, ['secondtest'], obj=new_repo) assert res.exit_code == 1 res = runner.invoke(cli.branch_remove, ['secondtest', '-f'], obj=new_repo) assert res.exit_code == 0 assert res.stdout == f"Deleted BRANCH: secondtest HEAD: {newDigest}\n" res = runner.invoke(cli.branch_list, obj=new_repo) assert res.exit_code == 0 assert res.stdout == "['master', 'origin/master']\n" finally: new_repo._env._close_environments()
def test_checkout_writer_branch_works(dummy_repo: Repository): from hangar.records.heads import get_staging_branch_head dummy_repo.create_branch('dev') runner = CliRunner() res = runner.invoke(cli.checkout, ['dev'], obj=dummy_repo) assert res.exit_code == 0 assert res.stdout == 'Writer checkout head set to branch: dev\n' recorded_branch = get_staging_branch_head(dummy_repo._env.branchenv) assert recorded_branch == 'dev' assert dummy_repo.writer_lock_held is False
def test_cannot_operate_without_repo_init(managed_tmpdir): repo = Repository(path=managed_tmpdir, exists=False) with pytest.raises(RuntimeError): repo.writer_lock_held() with pytest.raises(RuntimeError): repo.checkout() with pytest.raises(RuntimeError): repo.writer_lock_held() with pytest.raises(RuntimeError): repo.log() with pytest.raises(RuntimeError): repo.summary() with pytest.raises(RuntimeError): repo.merge('fail', 'master', 'nonexistant') with pytest.raises(RuntimeError): repo.create_branch('test') with pytest.raises(RuntimeError): repo.list_branches() with pytest.raises(RuntimeError): repo.force_release_writer_lock() with pytest.raises(RuntimeError): repo.remote.add('origin', 'foo') with pytest.raises(RuntimeError): repo.remote.remove('origin') with pytest.raises(RuntimeError): repo.remote.fetch('origin', 'master') with pytest.raises(RuntimeError): repo.remote.fetch_data('origin', branch='master') with pytest.raises(RuntimeError): repo.remote.list_all() with pytest.raises(RuntimeError): repo.remote.ping('origin') with pytest.raises(RuntimeError): repo.remote.push('origin', 'master') with pytest.raises(RuntimeError): repo.remove_branch('master') with pytest.raises(RuntimeError): repo.path with pytest.raises(RuntimeError): repo.version with pytest.raises(RuntimeError): repo.writer_lock_held with pytest.raises(RuntimeError): repo.size_human with pytest.raises(RuntimeError): repo.size_nbytes assert repo._env.repo_is_initialized is False
def branch_create(repo: Repository, name, startpoint): """Create a branch with NAME at STARTPOINT (short-digest or branch) If no STARTPOINT is provided, the new branch is positioned at the HEAD of the staging area branch, automatically. """ from hangar.records.commiting import expand_short_commit_digest from hangar.records.heads import get_branch_head_commit from hangar.records.heads import get_staging_branch_head branch_names = repo.list_branches() if name in branch_names: e = ValueError(f'branch name: {name} already exists') raise click.ClickException(e) try: if startpoint is None: branch = get_staging_branch_head(repo._env.branchenv) base_commit = get_branch_head_commit(repo._env.branchenv, branch) elif startpoint in branch_names: base_commit = get_branch_head_commit(repo._env.branchenv, startpoint) else: base_commit = expand_short_commit_digest(repo._env.refenv, startpoint) res = repo.create_branch(name, base_commit=base_commit) except (KeyError, ValueError, RuntimeError) as e: raise click.ClickException(e) click.echo(f'Created BRANCH: {res.name} HEAD: {res.digest}')
def branch_create(ctx, name, startpoint): """Create a branch with NAME at STARTPOINT (short-digest or branch) If no STARTPOINT is provided, the new branch is positioned at the HEAD of the staging area branch, automatically. """ from hangar.records.heads import get_branch_head_commit, get_staging_branch_head P = os.getcwd() repo = Repository(path=P) branch_names = repo.list_branches() if name in branch_names: raise ValueError(f'branch name: {name} already exists') if startpoint is None: branch = get_staging_branch_head(repo._env.branchenv) base_commit = get_branch_head_commit(repo._env.branchenv, branch) elif startpoint in branch_names: base_commit = get_branch_head_commit(repo._env.branchenv, startpoint) else: base_commit = expand_short_commit_digest(repo._env.refenv, startpoint) click.echo(f'BRANCH: ' + repo.create_branch(name, base_commit=base_commit) + f' HEAD: {base_commit}')
def test_checkout_writer_branch_lock_held_errors(dummy_repo: Repository): from hangar.records.heads import get_staging_branch_head dummy_repo.create_branch('testbranch') co = dummy_repo.checkout(write=True, branch='master') try: runner = CliRunner() res = runner.invoke(cli.checkout, ['testbranch'], obj=dummy_repo) assert res.exit_code == 1 msg = res.stdout assert msg.startswith('Error: Cannot acquire the writer lock.') is True recorded_branch = get_staging_branch_head(dummy_repo._env.branchenv) assert recorded_branch == 'master' assert dummy_repo.writer_lock_held is True assert co.branch_name == 'master' finally: co.close() assert dummy_repo.writer_lock_held is False
def branch(l, b): if l: P = os.getcwd() repo = Repository(path=P) click.echo(repo.list_branch_names()) if b: P = os.getcwd() repo = Repository(path=P) succ = repo.create_branch(b) click.echo(f'create branch operation success: {succ}')
def test_server_push_two_branch_then_clone_fetch_data_options( self, two_branch_multi_commit_repo_class, managed_tmpdir_class, array5by7_class, fetchBranch, fetchCommit, fetchAsetns, fetchNbytes, fetchAll_history, tmp_path_factory): from hangar import Repository from operator import eq branch, branchHist, devCmts, masterHist, server_instance = two_branch_multi_commit_repo_class # Clone test (master branch) _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True) new_tmpdir = str(_new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) newRepo.remote.fetch('origin', branch=branch.name) newRepo.create_branch('testbranch', base_commit=branchHist['head']) assert newRepo.list_branches() == ['master', 'origin/master', f'origin/{branch.name}', branch.name] # ------------------ format arguments depending on options ----------------- kwargs = { 'column_names': fetchAsetns, 'max_num_bytes': fetchNbytes, 'retrieve_all_history': fetchAll_history, } if fetchBranch is not None: func = branchHist if fetchBranch == 'testbranch' else masterHist kwargs['branch'] = fetchBranch kwargs['commit'] = None else: func = branchHist if fetchBranch == 'br' else masterHist kwargs['branch'] = None kwargs['commit'] = func['head'] if fetchAll_history is True: commits_to_check = func['order'] else: commits_to_check = [func['head']] # ----------------------- retrieve data with desired options -------------- # This case should fail if (fetchAll_history is True) and isinstance(fetchNbytes, int): try: with pytest.raises(ValueError): fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) finally: newRepo._env._close_environments() return True # get data fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) assert commits_to_check == fetch_commits # ------------- check that you got everything you expected ---------------- for fCmt in fetch_commits: co = newRepo.checkout(commit=fCmt) assert co.commit_hash == fCmt # when we are checking one aset only if isinstance(fetchAsetns, tuple): d = co.columns[fetchAsetns[0]] # ensure we didn't fetch the other data simultaneously ds1SampList, ds2SampList, ds3SampList, ds4SampList = devCmts[fCmt] if fetchAsetns[0] == 'writtenaset': compare = ds1SampList cmp_func = np.allclose elif fetchAsetns[0] == '_two': compare = ds2SampList cmp_func = np.allclose elif fetchAsetns[0] == 'str_col': compare = ds3SampList cmp_func = eq else: compare = ds4SampList cmp_func = eq totalSeen = 0 for idx, samp in enumerate(compare): if fetchNbytes is None: assert cmp_func(samp, d[str(idx)]) else: try: arr = d[str(idx)] assert cmp_func(samp, arr) try: totalSeen += arr.nbytes except AttributeError: totalSeen += len(arr) except FileNotFoundError: pass assert totalSeen <= fetchNbytes # compare both asets at the same time else: d = co.columns['writtenaset'] dd = co.columns['_two'] str_col = co.columns['str_col'] bytes_col = co.columns['bytes_col'] ds1List, ds2List, ds3List, ds4List = devCmts[fCmt] totalSeen = 0 for idx, ds1ds2ds3ds4 in enumerate(zip(ds1List, ds2List, ds3List, ds4List)): ds1, ds2, ds3, ds4 = ds1ds2ds3ds4 if fetchNbytes is None: assert np.allclose(ds1, d[str(idx)]) assert np.allclose(ds2, dd[str(idx)]) assert ds3 == str_col[str(idx)] assert ds4 == bytes_col[str(idx)] else: try: arr1 = d[str(idx)] assert np.allclose(ds1, arr1) totalSeen += arr1.nbytes except FileNotFoundError: pass try: arr2 = dd[str(idx)] assert np.allclose(ds2, arr2) totalSeen += arr2.nbytes except FileNotFoundError: pass try: sval = str_col[str(idx)] assert ds3 == sval totalSeen += len(sval.encode()) except FileNotFoundError: pass try: bval = bytes_col[str(idx)] assert ds4 == bval totalSeen += len(bval) except FileNotFoundError: pass assert totalSeen <= fetchNbytes co.close() newRepo._env._close_environments()
def test_server_push_two_branch_then_clone_fetch_data_options( server_instance, repo, managed_tmpdir, array5by7, nMasterCommits, nMasterSamples, nDevCommits, nDevSamples, fetchBranch, fetchCommit, fetchAsetns, fetchNbytes, fetchAll_history): from hangar import Repository from hangar.records.summarize import list_history # Push master branch test masterCmts = {} co = repo.checkout(write=True) co.arraysets.init_arrayset(name='writtenaset', shape=(5, 7), dtype=np.float32) co.arraysets.init_arrayset(name='_two', shape=(20), dtype=np.float32) for cIdx in range(nMasterCommits): if cIdx != 0: co = repo.checkout(write=True) masterSampList1 = [] masterSampList2 = [] with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd: for prevKey in list(d.keys())[1:]: d.remove(prevKey) dd.remove(prevKey) for sIdx in range(nMasterSamples): arr1 = np.random.randn(*array5by7.shape).astype( np.float32) * 100 d[str(sIdx)] = arr1 masterSampList1.append(arr1) arr2 = np.random.randn(20).astype(np.float32) dd[str(sIdx)] = arr2 masterSampList2.append(arr2) cmt = co.commit(f'master commit number: {cIdx}') masterCmts[cmt] = (masterSampList1, masterSampList2) co.close() repo.remote.add('origin', server_instance) push1 = repo.remote.push('origin', 'master') assert push1 == 'master' masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master') # Push dev branch test devCmts = masterCmts.copy() branch = repo.create_branch('testbranch') for cIdx in range(nDevCommits): co = repo.checkout(write=True, branch=branch.name) devSampList1 = [] devSampList2 = [] with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd: for prevKey in list(d.keys())[1:]: d.remove(prevKey) dd.remove(prevKey) for sIdx in range(nDevSamples): arr1 = np.random.randn(*array5by7.shape).astype( np.float32) * 100 d[str(sIdx)] = arr1 devSampList1.append(arr1) arr2 = np.random.randn(20).astype(np.float32) dd[str(sIdx)] = arr2 devSampList2.append(arr2) cmt = co.commit(f'dev commit number: {cIdx}') devCmts[cmt] = (devSampList1, devSampList2) co.close() push2 = repo.remote.push('origin', branch.name) assert push2 == branch.name branchHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name=branch.name) # -------------------------- end setup ------------------------------------ # Clone test (master branch) new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) newRepo.remote.fetch('origin', branch=branch.name) newRepo.create_branch('testbranch', base_commit=branchHist['head']) assert newRepo.list_branches() == [ 'master', 'origin/master', f'origin/{branch.name}', branch.name ] # ------------------ format arguments dependingon options ----------------- kwargs = { 'arrayset_names': fetchAsetns, 'max_num_bytes': fetchNbytes, 'retrieve_all_history': fetchAll_history, } if fetchBranch is not None: func = branchHist if fetchBranch == 'testbranch' else masterHist kwargs['branch'] = fetchBranch kwargs['commit'] = None else: func = branchHist if fetchBranch == 'br' else masterHist kwargs['branch'] = None kwargs['commit'] = func['head'] if fetchAll_history is True: commits_to_check = func['order'] else: commits_to_check = [func['head']] # ----------------------- retrieve data with desired options -------------- # This case should fail if (fetchAll_history is True) and isinstance(fetchNbytes, int): try: with pytest.raises(ValueError): fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) finally: newRepo._env._close_environments() return True # get data fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) assert commits_to_check == fetch_commits # ------------- check that you got everything you expected ---------------- for fCmt in fetch_commits: co = newRepo.checkout(commit=fCmt) assert co.commit_hash == fCmt # when we are checking one aset only if isinstance(fetchAsetns, tuple): d = co.arraysets[fetchAsetns[0]] # ensure we didn't fetch the other data simultaneously ds1SampList, ds2SampList = devCmts[fCmt] if fetchAsetns[0] == 'writtenaset': compare = ds1SampList else: compare = ds2SampList totalSeen = 0 for idx, samp in enumerate(compare): if fetchNbytes is None: assert np.allclose(samp, d[str(idx)]) else: try: arr = d[str(idx)] assert np.allclose(samp, arr) totalSeen += arr.nbytes except FileNotFoundError: pass assert totalSeen <= fetchNbytes # compare both asets at the same time else: d = co.arraysets['writtenaset'] dd = co.arraysets['_two'] ds1List, ds2List = devCmts[fCmt] totalSeen = 0 for idx, ds1ds2 in enumerate(zip(ds1List, ds2List)): ds1, ds2 = ds1ds2 if fetchNbytes is None: assert np.allclose(ds1, d[str(idx)]) assert np.allclose(ds2, dd[str(idx)]) else: try: arr1 = d[str(idx)] assert np.allclose(ds1, arr1) totalSeen += arr1.nbytes except FileNotFoundError: pass try: arr2 = dd[str(idx)] assert np.allclose(ds2, arr2) totalSeen += arr2.nbytes except FileNotFoundError: pass assert totalSeen <= fetchNbytes co.close() newRepo._env._close_environments()