def test_push_and_clone_master_linear_history_multiple_commits( server_instance, repo, managed_tmpdir, array5by7, nCommits, nSamples): from hangar import Repository from hangar.records.summarize import list_history cmtList = [] co = repo.checkout(write=True) co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32) for cIdx in range(nCommits): if cIdx != 0: co = repo.checkout(write=True) sampList = [] with co.columns['writtenaset'] as d: for prevKey in list(d.keys())[1:]: del d[prevKey] for sIdx in range(nSamples): arr = np.random.randn(*array5by7.shape).astype( np.float32) * 100 d[str(sIdx)] = arr sampList.append(arr) cmt = co.commit(f'commit number: {cIdx}') cmtList.append((cmt, sampList)) co.close() masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master') repo.remote.add('origin', server_instance) push1 = repo.remote.push('origin', 'master') assert push1 == 'master' new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) assert newRepo.list_branches() == ['master', 'origin/master'] for cmt, sampList in cmtList: with pytest.warns(UserWarning): nco = newRepo.checkout(commit=cmt) assert len(nco.columns) == 1 assert 'writtenaset' in nco.columns assert len(nco.columns['writtenaset']) == len(sampList) assert nco.columns['writtenaset'].contains_remote_references is True remoteKeys = nco.columns['writtenaset'].remote_reference_keys assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys for idx, _ in enumerate(sampList): sIdx = str(idx) assert sIdx in nco.columns['writtenaset'] with pytest.raises(FileNotFoundError): shouldNotExist = nco.columns['writtenaset'][sIdx] nco.close() cloneMasterHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name='master') assert cloneMasterHist == masterHist newRepo._env._close_environments()
def test_server_fetch_data_sample_not_valid_type( self, two_multi_format_repo_class, managed_tmpdir_class, tmp_path_factory ): from hangar import Repository cmt, server_instance = two_multi_format_repo_class # Clone test (master branch) _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True) new_tmpdir = str(_new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) with pytest.raises(TypeError): newRepo.remote.fetch_data_sample( remote='origin', branch='master', column='array_flat', samples=[b'BYTES_TYPE_NOT_VALID']) with pytest.raises(ValueError, match='nested column specifier sequence'): newRepo.remote.fetch_data_sample( remote='origin', branch='master', column='array_nested', samples=[(0, 1, 'ARRAY_NOT_VALID')]) newRepo._env._close_environments()
def test_push_clone_three_way_merge(server_instance, repo_2_br_no_conf, managed_tmpdir): from hangar import Repository repo_2_br_no_conf.remote.add('origin', server_instance) push1 = repo_2_br_no_conf.remote.push('origin', 'master') assert push1 == 'master' push2 = repo_2_br_no_conf.remote.push('origin', 'testbranch') assert push2 == 'testbranch' test_head = repo_2_br_no_conf.log(branch='testbranch', return_contents=True)['head'] master_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head'] merge_cmt = repo_2_br_no_conf.merge('merge commit', 'master', 'testbranch') merge_head = repo_2_br_no_conf.log(branch='master', return_contents=True)['head'] merge_order = repo_2_br_no_conf.log(branch='master', return_contents=True)['order'] merge_push = repo_2_br_no_conf.remote.push('origin', 'master') assert merge_push == 'master' assert merge_head != master_head assert merge_head != test_head new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) clone_head = newRepo.log(branch='master', return_contents=True)['head'] clone_order = newRepo.log(branch='master', return_contents=True)['order'] assert clone_head == merge_head == merge_cmt assert merge_order == clone_order newRepo._env._close_environments()
def test_server_fetch_data_sample_not_existing_fails( self, two_multi_format_repo_class, managed_tmpdir_class, tmp_path_factory ): from hangar import Repository cmt, server_instance = two_multi_format_repo_class # Clone test (master branch) _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True) new_tmpdir = str(_new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) with pytest.raises(KeyError): newRepo.remote.fetch_data_sample( remote='origin', branch='master', column='array_flat', samples=['DOESNOTEXIST']) with pytest.raises(KeyError): newRepo.remote.fetch_data_sample( remote='origin', branch='master', column='array_nested', samples=[(1, 'DOESNOTEXIST')]) with pytest.raises(KeyError): newRepo.remote.fetch_data_sample( remote='origin', branch='master', column='array_nested', samples=[('DOESNOTEXIST', 0)]) newRepo._env._close_environments()
def test_push_clone_digests_exceeding_server_nbyte_limit( mocker, server_instance_nbytes_limit, repo, managed_tmpdir): from hangar import Repository from hangar.remote import chunks, client # Push master branch test masterCmtList = [] co = repo.checkout(write=True) co.add_ndarray_column(name='aset', shape=(50, 50), dtype=np.float32) for cIdx in range(4): if cIdx != 0: co = repo.checkout(write=True) masterSampList = [] with co.columns['aset'] as d: for prevKey in list(d.keys())[1:]: del d[prevKey] for sIdx in range(70): arr = np.random.randint(0, 255, size=(50, 50)).astype(np.float32) d[str(sIdx)] = arr masterSampList.append(arr) cmt = co.commit(f'master commit number: {cIdx}') masterCmtList.append((cmt, masterSampList)) co.close() repo.remote.add('origin', server_instance_nbytes_limit) spy = mocker.spy(chunks, 'tensorChunkedIterator') push1 = repo.remote.push('origin', 'master') assert chunks.tensorChunkedIterator.call_count == 6 for call in spy.call_args_list: assert call[1][ 'uncomp_nbytes'] <= 550_000 # maximum amount over 100_000 observed in test development assert push1 == 'master' # Clone test (master branch) new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance_nbytes_limit, remove_old=True) assert newRepo.list_branches() == ['master', 'origin/master'] spy = mocker.spy(client.HangarClient, 'fetch_data') for cmt, sampList in masterCmtList: newRepo.remote.fetch_data('origin', commit=cmt) nco = newRepo.checkout(commit=cmt) assert len(nco.columns) == 1 assert 'aset' in nco.columns assert len(nco.columns['aset']) == 70 for sIdx, samp in enumerate(sampList): assert np.allclose(nco.columns['aset'][str(sIdx)], samp) nco.close() del nco assert client.HangarClient.fetch_data.call_count == 8 newRepo._env._close_environments()
def clone(repo: Repository, remote, name, email, overwrite): """Initialize a repository at the current path and fetch updated records from REMOTE. Note: This method does not actually download the data to disk. Please look into the ``fetch-data`` command. """ if repo.initialized and (not overwrite): click.echo(f'Repo already exists at: {repo.path}') else: repo.clone(name, email, remote, remove_old=overwrite)
def clone(remote, uname, email, overwrite): if isinstance(uname, (list, tuple)): uname = ' '.join(uname) P = os.getcwd() repo = Repository(path=P) repo.clone(user_name=uname, user_email=email, remote_address=remote, remove_old=overwrite) click.echo(f'Hangar repository initialized at {P}')
def test_local_without_data_fails_no_common_no_local(self, written_two_cmt_server_repo, managed_tmpdir): new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) server, _ = written_two_cmt_server_repo repo = Repository(path=new_tmpdir, exists=False) repo.clone('name', '[email protected]', server, remove_old=True) co = repo.checkout() aset = co.arraysets['writtenaset'] with pytest.raises(ValueError): torch_dset = make_torch_dataset(aset) co.close() repo._env._close_environments()
def test_local_without_data_fails_data_unavailable(self, written_two_cmt_server_repo, managed_tmpdir): new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) server, _ = written_two_cmt_server_repo repo = Repository(path=new_tmpdir, exists=False) repo.clone('name', '[email protected]', server, remove_old=True) co = repo.checkout() aset = co.arraysets['writtenaset'] with pytest.raises(FileNotFoundError): tf_dset = make_tf_dataset(aset, keys=['1', '2']) co.close() repo._env._close_environments()
def clone(ctx, remote, name, email, overwrite): """Initialize a repository at the current path and fetch updated records from REMOTE. Note: This method does not actually download the data to disk. Please look into the ``fetch-data`` command. """ P = os.getcwd() repo = Repository(path=P, exists=False) repo.clone(user_name=name, user_email=email, remote_address=remote, remove_old=overwrite) click.echo(f'Hangar repository initialized at {P}')
def test_push_clone_digests_exceeding_server_nbyte_limit( server_instance, repo, managed_tmpdir): from hangar.remote import config from hangar import Repository config.config['server']['grpc']['fetch_max_nbytes'] = 100_000 config.config['client']['grpc']['push_max_nbytes'] = 100_000 # Push master branch test masterCmtList = [] co = repo.checkout(write=True) co.arraysets.init_arrayset(name='aset', shape=(50, 20), dtype=np.float32) for cIdx in range(4): if cIdx != 0: co = repo.checkout(write=True) masterSampList = [] with co.arraysets['aset'] as d: for prevKey in list(d.keys())[1:]: d.remove(prevKey) for sIdx in range(70): arr = np.random.randn(50, 20).astype(np.float32) d[str(sIdx)] = arr masterSampList.append(arr) cmt = co.commit(f'master commit number: {cIdx}') masterCmtList.append((cmt, masterSampList)) co.close() repo.remote.add('origin', server_instance) push1 = repo.remote.push('origin', 'master') assert push1 == 'master' # Clone test (master branch) new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) assert newRepo.list_branches() == ['master', 'origin/master'] for cmt, sampList in masterCmtList: newRepo.remote.fetch_data('origin', commit=cmt) nco = newRepo.checkout(commit=cmt) assert len(nco.arraysets) == 1 assert 'aset' in nco.arraysets assert len(nco.arraysets['aset']) == 70 for sIdx, samp in enumerate(sampList): assert np.allclose(nco.arraysets['aset'][str(sIdx)], samp) nco.close() newRepo._env._close_environments()
def test_push_restricted_with_right_username_password( server_instance_push_restricted, repo, managed_tmpdir): from hangar import Repository # Push master branch test masterCmtList = [] co = repo.checkout(write=True) co.add_ndarray_column(name='aset', shape=(50, 20), dtype=np.float32) for cIdx in range(1): if cIdx != 0: co = repo.checkout(write=True) masterSampList = [] with co.columns['aset'] as d: for prevKey in list(d.keys())[1:]: del d[prevKey] for sIdx in range(70): arr = np.random.randn(50, 20).astype(np.float32) d[str(sIdx)] = arr masterSampList.append(arr) cmt = co.commit(f'master commit number: {cIdx}') masterCmtList.append((cmt, masterSampList)) co.close() repo.remote.add('origin', server_instance_push_restricted) push1 = repo.remote.push('origin', 'master', username='******', password='******') assert push1 == 'master' # Clone test (master branch) new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance_push_restricted, remove_old=True) assert newRepo.list_branches() == ['master', 'origin/master'] for cmt, sampList in masterCmtList: newRepo.remote.fetch_data('origin', commit=cmt) nco = newRepo.checkout(commit=cmt) assert len(nco.columns) == 1 assert 'aset' in nco.columns assert len(nco.columns['aset']) == 70 for sIdx, samp in enumerate(sampList): assert np.allclose(nco.columns['aset'][str(sIdx)], samp) nco.close() newRepo._env._close_environments()
def test_server_fetch_data_sample( self, two_multi_format_repo_class, managed_tmpdir_class, fetchOp, column_name, keys, tmp_path_factory ): from hangar import Repository cmt, server_instance = two_multi_format_repo_class # Clone test (master branch) _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True) new_tmpdir = str(_new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) # ------------------ format arguments depending on options ----------------- kwargs = { 'column': column_name, 'samples': keys } if fetchOp == 'branch': kwargs['branch'] = 'master' elif fetchOp == 'commit': kwargs['commit'] = cmt else: raise ValueError(f'fetchOp unknown: {fetchOp}') fetch_commit = newRepo.remote.fetch_data_sample(remote='origin', **kwargs) assert fetch_commit == cmt co = newRepo.checkout() try: col = co[column_name] if isinstance(keys, (list, tuple)): if column_name.endswith('flat'): for key in keys: assert col[key] is not None else: for sample in keys: if isinstance(sample, (list, tuple)): if len(sample) == 2: assert col[sample[0]][sample[1]] is not None elif len(sample) == 1: assert col[sample[0]][...] is not None else: assert col[sample][...] is not None finally: co.close() newRepo._env._close_environments()
def test_server_fetch_data_sample_branch_and_commit_args_passed_fails( self, two_multi_format_repo_class, managed_tmpdir_class, tmp_path_factory ): from hangar import Repository cmt, server_instance = two_multi_format_repo_class # Clone test (master branch) _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True) new_tmpdir = str(_new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) with pytest.raises(ValueError, match='``branch`` and ``commit``'): newRepo.remote.fetch_data_sample( remote='origin', branch='master', # actual value which might otherwise work commit=cmt, # actual value which might otherwise work column='array_flat', samples=[0, 1]) newRepo._env._close_environments()
def test_server_push_two_branch_then_clone_fetch_data_options( self, two_branch_multi_commit_repo_class, managed_tmpdir_class, array5by7_class, fetchBranch, fetchCommit, fetchAsetns, fetchNbytes, fetchAll_history, tmp_path_factory): from hangar import Repository from operator import eq branch, branchHist, devCmts, masterHist, server_instance = two_branch_multi_commit_repo_class # Clone test (master branch) _new_tmpdir = tmp_path_factory.mktemp('newclone', numbered=True) new_tmpdir = str(_new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) newRepo.remote.fetch('origin', branch=branch.name) newRepo.create_branch('testbranch', base_commit=branchHist['head']) assert newRepo.list_branches() == ['master', 'origin/master', f'origin/{branch.name}', branch.name] # ------------------ format arguments depending on options ----------------- kwargs = { 'column_names': fetchAsetns, 'max_num_bytes': fetchNbytes, 'retrieve_all_history': fetchAll_history, } if fetchBranch is not None: func = branchHist if fetchBranch == 'testbranch' else masterHist kwargs['branch'] = fetchBranch kwargs['commit'] = None else: func = branchHist if fetchBranch == 'br' else masterHist kwargs['branch'] = None kwargs['commit'] = func['head'] if fetchAll_history is True: commits_to_check = func['order'] else: commits_to_check = [func['head']] # ----------------------- retrieve data with desired options -------------- # This case should fail if (fetchAll_history is True) and isinstance(fetchNbytes, int): try: with pytest.raises(ValueError): fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) finally: newRepo._env._close_environments() return True # get data fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) assert commits_to_check == fetch_commits # ------------- check that you got everything you expected ---------------- for fCmt in fetch_commits: co = newRepo.checkout(commit=fCmt) assert co.commit_hash == fCmt # when we are checking one aset only if isinstance(fetchAsetns, tuple): d = co.columns[fetchAsetns[0]] # ensure we didn't fetch the other data simultaneously ds1SampList, ds2SampList, ds3SampList, ds4SampList = devCmts[fCmt] if fetchAsetns[0] == 'writtenaset': compare = ds1SampList cmp_func = np.allclose elif fetchAsetns[0] == '_two': compare = ds2SampList cmp_func = np.allclose elif fetchAsetns[0] == 'str_col': compare = ds3SampList cmp_func = eq else: compare = ds4SampList cmp_func = eq totalSeen = 0 for idx, samp in enumerate(compare): if fetchNbytes is None: assert cmp_func(samp, d[str(idx)]) else: try: arr = d[str(idx)] assert cmp_func(samp, arr) try: totalSeen += arr.nbytes except AttributeError: totalSeen += len(arr) except FileNotFoundError: pass assert totalSeen <= fetchNbytes # compare both asets at the same time else: d = co.columns['writtenaset'] dd = co.columns['_two'] str_col = co.columns['str_col'] bytes_col = co.columns['bytes_col'] ds1List, ds2List, ds3List, ds4List = devCmts[fCmt] totalSeen = 0 for idx, ds1ds2ds3ds4 in enumerate(zip(ds1List, ds2List, ds3List, ds4List)): ds1, ds2, ds3, ds4 = ds1ds2ds3ds4 if fetchNbytes is None: assert np.allclose(ds1, d[str(idx)]) assert np.allclose(ds2, dd[str(idx)]) assert ds3 == str_col[str(idx)] assert ds4 == bytes_col[str(idx)] else: try: arr1 = d[str(idx)] assert np.allclose(ds1, arr1) totalSeen += arr1.nbytes except FileNotFoundError: pass try: arr2 = dd[str(idx)] assert np.allclose(ds2, arr2) totalSeen += arr2.nbytes except FileNotFoundError: pass try: sval = str_col[str(idx)] assert ds3 == sval totalSeen += len(sval.encode()) except FileNotFoundError: pass try: bval = bytes_col[str(idx)] assert ds4 == bval totalSeen += len(bval) except FileNotFoundError: pass assert totalSeen <= fetchNbytes co.close() newRepo._env._close_environments()
def test_server_push_two_branch_then_clone_fetch_data_options( server_instance, repo, managed_tmpdir, array5by7, nMasterCommits, nMasterSamples, nDevCommits, nDevSamples, fetchBranch, fetchCommit, fetchAsetns, fetchNbytes, fetchAll_history): from hangar import Repository from hangar.records.summarize import list_history # Push master branch test masterCmts = {} co = repo.checkout(write=True) co.arraysets.init_arrayset(name='writtenaset', shape=(5, 7), dtype=np.float32) co.arraysets.init_arrayset(name='_two', shape=(20), dtype=np.float32) for cIdx in range(nMasterCommits): if cIdx != 0: co = repo.checkout(write=True) masterSampList1 = [] masterSampList2 = [] with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd: for prevKey in list(d.keys())[1:]: d.remove(prevKey) dd.remove(prevKey) for sIdx in range(nMasterSamples): arr1 = np.random.randn(*array5by7.shape).astype( np.float32) * 100 d[str(sIdx)] = arr1 masterSampList1.append(arr1) arr2 = np.random.randn(20).astype(np.float32) dd[str(sIdx)] = arr2 masterSampList2.append(arr2) cmt = co.commit(f'master commit number: {cIdx}') masterCmts[cmt] = (masterSampList1, masterSampList2) co.close() repo.remote.add('origin', server_instance) push1 = repo.remote.push('origin', 'master') assert push1 == 'master' masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master') # Push dev branch test devCmts = masterCmts.copy() branch = repo.create_branch('testbranch') for cIdx in range(nDevCommits): co = repo.checkout(write=True, branch=branch.name) devSampList1 = [] devSampList2 = [] with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd: for prevKey in list(d.keys())[1:]: d.remove(prevKey) dd.remove(prevKey) for sIdx in range(nDevSamples): arr1 = np.random.randn(*array5by7.shape).astype( np.float32) * 100 d[str(sIdx)] = arr1 devSampList1.append(arr1) arr2 = np.random.randn(20).astype(np.float32) dd[str(sIdx)] = arr2 devSampList2.append(arr2) cmt = co.commit(f'dev commit number: {cIdx}') devCmts[cmt] = (devSampList1, devSampList2) co.close() push2 = repo.remote.push('origin', branch.name) assert push2 == branch.name branchHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name=branch.name) # -------------------------- end setup ------------------------------------ # Clone test (master branch) new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) newRepo.remote.fetch('origin', branch=branch.name) newRepo.create_branch('testbranch', base_commit=branchHist['head']) assert newRepo.list_branches() == [ 'master', 'origin/master', f'origin/{branch.name}', branch.name ] # ------------------ format arguments dependingon options ----------------- kwargs = { 'arrayset_names': fetchAsetns, 'max_num_bytes': fetchNbytes, 'retrieve_all_history': fetchAll_history, } if fetchBranch is not None: func = branchHist if fetchBranch == 'testbranch' else masterHist kwargs['branch'] = fetchBranch kwargs['commit'] = None else: func = branchHist if fetchBranch == 'br' else masterHist kwargs['branch'] = None kwargs['commit'] = func['head'] if fetchAll_history is True: commits_to_check = func['order'] else: commits_to_check = [func['head']] # ----------------------- retrieve data with desired options -------------- # This case should fail if (fetchAll_history is True) and isinstance(fetchNbytes, int): try: with pytest.raises(ValueError): fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) finally: newRepo._env._close_environments() return True # get data fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) assert commits_to_check == fetch_commits # ------------- check that you got everything you expected ---------------- for fCmt in fetch_commits: co = newRepo.checkout(commit=fCmt) assert co.commit_hash == fCmt # when we are checking one aset only if isinstance(fetchAsetns, tuple): d = co.arraysets[fetchAsetns[0]] # ensure we didn't fetch the other data simultaneously ds1SampList, ds2SampList = devCmts[fCmt] if fetchAsetns[0] == 'writtenaset': compare = ds1SampList else: compare = ds2SampList totalSeen = 0 for idx, samp in enumerate(compare): if fetchNbytes is None: assert np.allclose(samp, d[str(idx)]) else: try: arr = d[str(idx)] assert np.allclose(samp, arr) totalSeen += arr.nbytes except FileNotFoundError: pass assert totalSeen <= fetchNbytes # compare both asets at the same time else: d = co.arraysets['writtenaset'] dd = co.arraysets['_two'] ds1List, ds2List = devCmts[fCmt] totalSeen = 0 for idx, ds1ds2 in enumerate(zip(ds1List, ds2List)): ds1, ds2 = ds1ds2 if fetchNbytes is None: assert np.allclose(ds1, d[str(idx)]) assert np.allclose(ds2, dd[str(idx)]) else: try: arr1 = d[str(idx)] assert np.allclose(ds1, arr1) totalSeen += arr1.nbytes except FileNotFoundError: pass try: arr2 = dd[str(idx)] assert np.allclose(ds2, arr2) totalSeen += arr2.nbytes except FileNotFoundError: pass assert totalSeen <= fetchNbytes co.close() newRepo._env._close_environments()
def test_server_push_second_branch_with_new_commit_then_clone_partial_fetch( server_instance, repo, managed_tmpdir, array5by7, nMasterCommits, nMasterSamples, nDevCommits, nDevSamples): from hangar import Repository from hangar.records.summarize import list_history # Push master branch test masterCmtList = [] co = repo.checkout(write=True) co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32) for cIdx in range(nMasterCommits): if cIdx != 0: co = repo.checkout(write=True) masterSampList = [] with co.columns['writtenaset'] as d: for prevKey in list(d.keys())[1:]: del d[prevKey] for sIdx in range(nMasterSamples): arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100 d[str(sIdx)] = arr masterSampList.append(arr) cmt = co.commit(f'master commit number: {cIdx}') masterCmtList.append((cmt, masterSampList)) co.close() repo.remote.add('origin', server_instance) push1 = repo.remote.push('origin', 'master') assert push1 == 'master' masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master') # Push dev branch test devCmtList = [] branch = repo.create_branch('testbranch') for cIdx in range(nDevCommits): co = repo.checkout(write=True, branch=branch.name) devSampList = [] with co.columns['writtenaset'] as d: for prevKey in list(d.keys())[1:]: del d[prevKey] for sIdx in range(nDevSamples): arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100 d[str(sIdx)] = arr devSampList.append(arr) cmt = co.commit(f'dev commit number: {cIdx}') devCmtList.append((cmt, devSampList)) co.close() push2 = repo.remote.push('origin', branch.name) assert push2 == branch.name branchHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name=branch.name) # Clone test (master branch) new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) assert newRepo.list_branches() == ['master', 'origin/master'] for cmt, sampList in masterCmtList: with pytest.warns(UserWarning): nco = newRepo.checkout(commit=cmt) assert len(nco.columns) == 1 assert 'writtenaset' in nco.columns assert len(nco.columns['writtenaset']) == nMasterSamples assert nco.columns['writtenaset'].contains_remote_references is True remoteKeys = nco.columns['writtenaset'].remote_reference_keys assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys for idx, _ in enumerate(sampList): sIdx = str(idx) assert sIdx in nco.columns['writtenaset'] with pytest.raises(FileNotFoundError): shouldNotExist = nco.columns['writtenaset'][sIdx] nco.close() cloneMasterHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name='master') assert cloneMasterHist == masterHist # Fetch test fetch = newRepo.remote.fetch('origin', branch=branch.name) assert fetch == f'origin/{branch.name}' assert newRepo.list_branches() == ['master', 'origin/master', f'origin/{branch.name}'] for cmt, sampList in devCmtList: with pytest.warns(UserWarning): nco = newRepo.checkout(commit=cmt) assert len(nco.columns) == 1 assert 'writtenaset' in nco.columns assert len(nco.columns['writtenaset']) == nDevSamples assert nco.columns['writtenaset'].contains_remote_references is True remoteKeys = nco.columns['writtenaset'].remote_reference_keys assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys for idx, _ in enumerate(sampList): sIdx = str(idx) assert sIdx in nco.columns['writtenaset'] with pytest.raises(FileNotFoundError): shouldNotExist = nco.columns['writtenaset'][sIdx] nco.close() cloneBranchHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name=f'origin/{branch.name}') assert cloneBranchHist == branchHist newRepo._env._close_environments()