def test_push_and_clone_master_linear_history_multiple_commits( server_instance, repo, managed_tmpdir, array5by7, nCommits, nSamples): from hangar import Repository from hangar.records.summarize import list_history cmtList = [] co = repo.checkout(write=True) co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32) for cIdx in range(nCommits): if cIdx != 0: co = repo.checkout(write=True) sampList = [] with co.columns['writtenaset'] as d: for prevKey in list(d.keys())[1:]: del d[prevKey] for sIdx in range(nSamples): arr = np.random.randn(*array5by7.shape).astype( np.float32) * 100 d[str(sIdx)] = arr sampList.append(arr) cmt = co.commit(f'commit number: {cIdx}') cmtList.append((cmt, sampList)) co.close() masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master') repo.remote.add('origin', server_instance) push1 = repo.remote.push('origin', 'master') assert push1 == 'master' new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) assert newRepo.list_branches() == ['master', 'origin/master'] for cmt, sampList in cmtList: with pytest.warns(UserWarning): nco = newRepo.checkout(commit=cmt) assert len(nco.columns) == 1 assert 'writtenaset' in nco.columns assert len(nco.columns['writtenaset']) == len(sampList) assert nco.columns['writtenaset'].contains_remote_references is True remoteKeys = nco.columns['writtenaset'].remote_reference_keys assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys for idx, _ in enumerate(sampList): sIdx = str(idx) assert sIdx in nco.columns['writtenaset'] with pytest.raises(FileNotFoundError): shouldNotExist = nco.columns['writtenaset'][sIdx] nco.close() cloneMasterHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name='master') assert cloneMasterHist == masterHist newRepo._env._close_environments()
def two_branch_multi_commit_repo_class(server_instance_class, classrepo, array5by7_class): from hangar.records.summarize import list_history nMasterCommits = 2 nMasterSamples = 10 nDevCommits = 1 nDevSamples = 16 # Push master branch test masterCmts = {} co = classrepo.checkout(write=True) co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32) co.add_ndarray_column(name='_two', shape=(20), dtype=np.float32) co.add_str_column('str_col') co.add_bytes_column('bytes_col') for cIdx in range(nMasterCommits): if cIdx != 0: co = classrepo.checkout(write=True) masterSampList1 = [] masterSampList2 = [] masterSampList3 = [] masterSampList4 = [] with co.columns['writtenaset'] as d,\ co.columns['_two'] as dd,\ co.columns['str_col'] as scol, \ co.columns['bytes_col'] as bcol: for prevKey in list(d.keys())[1:]: del d[prevKey] del dd[prevKey] del scol[prevKey] del bcol[prevKey] for sIdx in range(nMasterSamples): arr1 = np.random.randn(*array5by7_class.shape).astype(np.float32) * 100 d[str(sIdx)] = arr1 masterSampList1.append(arr1) arr2 = np.random.randn(20).astype(np.float32) dd[str(sIdx)] = arr2 masterSampList2.append(arr2) sval = f'strval master {cIdx} {sIdx}' scol[str(sIdx)] = sval masterSampList3.append(sval) bval = f'bytesval master {cIdx} {sIdx}'.encode() bcol[str(sIdx)] = bval masterSampList4.append(bval) cmt = co.commit(f'master commit number: {cIdx}') masterCmts[cmt] = (masterSampList1, masterSampList2, masterSampList3, masterSampList4) co.close() classrepo.remote.add('origin', server_instance_class) push1 = classrepo.remote.push('origin', 'master') assert push1 == 'master' masterHist = list_history(classrepo._env.refenv, classrepo._env.branchenv, branch_name='master') # Push dev branch test devCmts = masterCmts.copy() branch = classrepo.create_branch('testbranch') for cIdx in range(nDevCommits): co = classrepo.checkout(write=True, branch=branch.name) devSampList1 = [] devSampList2 = [] devSampList3 = [] devSampList4 = [] with co.columns['writtenaset'] as d,\ co.columns['_two'] as dd,\ co.columns['str_col'] as scol, \ co.columns['bytes_col'] as bcol: for prevKey in list(d.keys())[1:]: del d[prevKey] del dd[prevKey] del scol[prevKey] del bcol[prevKey] for sIdx in range(nDevSamples): arr1 = np.random.randn(*array5by7_class.shape).astype(np.float32) * 100 d[str(sIdx)] = arr1 devSampList1.append(arr1) arr2 = np.random.randn(20).astype(np.float32) dd[str(sIdx)] = arr2 devSampList2.append(arr2) sval = f'strval dev {cIdx} {sIdx}' scol[str(sIdx)] = sval devSampList3.append(sval) bval = f'bytesval dev {cIdx} {sIdx}'.encode() bcol[str(sIdx)] = bval devSampList4.append(bval) cmt = co.commit(f'dev commit number: {cIdx}') devCmts[cmt] = (devSampList1, devSampList2, devSampList3, devSampList4) co.close() push2 = classrepo.remote.push('origin', branch.name) assert push2 == branch.name branchHist = list_history(classrepo._env.refenv, classrepo._env.branchenv, branch_name=branch.name) yield branch, branchHist, devCmts, masterHist, server_instance_class pass
def test_server_push_second_branch_with_new_commit_then_clone_partial_fetch( server_instance, repo, managed_tmpdir, array5by7, nMasterCommits, nMasterSamples, nDevCommits, nDevSamples): from hangar import Repository from hangar.records.summarize import list_history # Push master branch test masterCmtList = [] co = repo.checkout(write=True) co.add_ndarray_column(name='writtenaset', shape=(5, 7), dtype=np.float32) for cIdx in range(nMasterCommits): if cIdx != 0: co = repo.checkout(write=True) masterSampList = [] with co.columns['writtenaset'] as d: for prevKey in list(d.keys())[1:]: del d[prevKey] for sIdx in range(nMasterSamples): arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100 d[str(sIdx)] = arr masterSampList.append(arr) cmt = co.commit(f'master commit number: {cIdx}') masterCmtList.append((cmt, masterSampList)) co.close() repo.remote.add('origin', server_instance) push1 = repo.remote.push('origin', 'master') assert push1 == 'master' masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master') # Push dev branch test devCmtList = [] branch = repo.create_branch('testbranch') for cIdx in range(nDevCommits): co = repo.checkout(write=True, branch=branch.name) devSampList = [] with co.columns['writtenaset'] as d: for prevKey in list(d.keys())[1:]: del d[prevKey] for sIdx in range(nDevSamples): arr = np.random.randn(*array5by7.shape).astype(np.float32) * 100 d[str(sIdx)] = arr devSampList.append(arr) cmt = co.commit(f'dev commit number: {cIdx}') devCmtList.append((cmt, devSampList)) co.close() push2 = repo.remote.push('origin', branch.name) assert push2 == branch.name branchHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name=branch.name) # Clone test (master branch) new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) assert newRepo.list_branches() == ['master', 'origin/master'] for cmt, sampList in masterCmtList: with pytest.warns(UserWarning): nco = newRepo.checkout(commit=cmt) assert len(nco.columns) == 1 assert 'writtenaset' in nco.columns assert len(nco.columns['writtenaset']) == nMasterSamples assert nco.columns['writtenaset'].contains_remote_references is True remoteKeys = nco.columns['writtenaset'].remote_reference_keys assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys for idx, _ in enumerate(sampList): sIdx = str(idx) assert sIdx in nco.columns['writtenaset'] with pytest.raises(FileNotFoundError): shouldNotExist = nco.columns['writtenaset'][sIdx] nco.close() cloneMasterHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name='master') assert cloneMasterHist == masterHist # Fetch test fetch = newRepo.remote.fetch('origin', branch=branch.name) assert fetch == f'origin/{branch.name}' assert newRepo.list_branches() == ['master', 'origin/master', f'origin/{branch.name}'] for cmt, sampList in devCmtList: with pytest.warns(UserWarning): nco = newRepo.checkout(commit=cmt) assert len(nco.columns) == 1 assert 'writtenaset' in nco.columns assert len(nco.columns['writtenaset']) == nDevSamples assert nco.columns['writtenaset'].contains_remote_references is True remoteKeys = nco.columns['writtenaset'].remote_reference_keys assert tuple([str(idx) for idx in range(len(sampList))]) == remoteKeys for idx, _ in enumerate(sampList): sIdx = str(idx) assert sIdx in nco.columns['writtenaset'] with pytest.raises(FileNotFoundError): shouldNotExist = nco.columns['writtenaset'][sIdx] nco.close() cloneBranchHist = list_history(newRepo._env.refenv, newRepo._env.branchenv, branch_name=f'origin/{branch.name}') assert cloneBranchHist == branchHist newRepo._env._close_environments()
def test_server_push_two_branch_then_clone_fetch_data_options( server_instance, repo, managed_tmpdir, array5by7, nMasterCommits, nMasterSamples, nDevCommits, nDevSamples, fetchBranch, fetchCommit, fetchAsetns, fetchNbytes, fetchAll_history): from hangar import Repository from hangar.records.summarize import list_history # Push master branch test masterCmts = {} co = repo.checkout(write=True) co.arraysets.init_arrayset(name='writtenaset', shape=(5, 7), dtype=np.float32) co.arraysets.init_arrayset(name='_two', shape=(20), dtype=np.float32) for cIdx in range(nMasterCommits): if cIdx != 0: co = repo.checkout(write=True) masterSampList1 = [] masterSampList2 = [] with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd: for prevKey in list(d.keys())[1:]: d.remove(prevKey) dd.remove(prevKey) for sIdx in range(nMasterSamples): arr1 = np.random.randn(*array5by7.shape).astype( np.float32) * 100 d[str(sIdx)] = arr1 masterSampList1.append(arr1) arr2 = np.random.randn(20).astype(np.float32) dd[str(sIdx)] = arr2 masterSampList2.append(arr2) cmt = co.commit(f'master commit number: {cIdx}') masterCmts[cmt] = (masterSampList1, masterSampList2) co.close() repo.remote.add('origin', server_instance) push1 = repo.remote.push('origin', 'master') assert push1 == 'master' masterHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name='master') # Push dev branch test devCmts = masterCmts.copy() branch = repo.create_branch('testbranch') for cIdx in range(nDevCommits): co = repo.checkout(write=True, branch=branch.name) devSampList1 = [] devSampList2 = [] with co.arraysets['writtenaset'] as d, co.arraysets['_two'] as dd: for prevKey in list(d.keys())[1:]: d.remove(prevKey) dd.remove(prevKey) for sIdx in range(nDevSamples): arr1 = np.random.randn(*array5by7.shape).astype( np.float32) * 100 d[str(sIdx)] = arr1 devSampList1.append(arr1) arr2 = np.random.randn(20).astype(np.float32) dd[str(sIdx)] = arr2 devSampList2.append(arr2) cmt = co.commit(f'dev commit number: {cIdx}') devCmts[cmt] = (devSampList1, devSampList2) co.close() push2 = repo.remote.push('origin', branch.name) assert push2 == branch.name branchHist = list_history(repo._env.refenv, repo._env.branchenv, branch_name=branch.name) # -------------------------- end setup ------------------------------------ # Clone test (master branch) new_tmpdir = pjoin(managed_tmpdir, 'new') mkdir(new_tmpdir) newRepo = Repository(path=new_tmpdir, exists=False) newRepo.clone('Test User', '*****@*****.**', server_instance, remove_old=True) newRepo.remote.fetch('origin', branch=branch.name) newRepo.create_branch('testbranch', base_commit=branchHist['head']) assert newRepo.list_branches() == [ 'master', 'origin/master', f'origin/{branch.name}', branch.name ] # ------------------ format arguments dependingon options ----------------- kwargs = { 'arrayset_names': fetchAsetns, 'max_num_bytes': fetchNbytes, 'retrieve_all_history': fetchAll_history, } if fetchBranch is not None: func = branchHist if fetchBranch == 'testbranch' else masterHist kwargs['branch'] = fetchBranch kwargs['commit'] = None else: func = branchHist if fetchBranch == 'br' else masterHist kwargs['branch'] = None kwargs['commit'] = func['head'] if fetchAll_history is True: commits_to_check = func['order'] else: commits_to_check = [func['head']] # ----------------------- retrieve data with desired options -------------- # This case should fail if (fetchAll_history is True) and isinstance(fetchNbytes, int): try: with pytest.raises(ValueError): fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) finally: newRepo._env._close_environments() return True # get data fetch_commits = newRepo.remote.fetch_data(remote='origin', **kwargs) assert commits_to_check == fetch_commits # ------------- check that you got everything you expected ---------------- for fCmt in fetch_commits: co = newRepo.checkout(commit=fCmt) assert co.commit_hash == fCmt # when we are checking one aset only if isinstance(fetchAsetns, tuple): d = co.arraysets[fetchAsetns[0]] # ensure we didn't fetch the other data simultaneously ds1SampList, ds2SampList = devCmts[fCmt] if fetchAsetns[0] == 'writtenaset': compare = ds1SampList else: compare = ds2SampList totalSeen = 0 for idx, samp in enumerate(compare): if fetchNbytes is None: assert np.allclose(samp, d[str(idx)]) else: try: arr = d[str(idx)] assert np.allclose(samp, arr) totalSeen += arr.nbytes except FileNotFoundError: pass assert totalSeen <= fetchNbytes # compare both asets at the same time else: d = co.arraysets['writtenaset'] dd = co.arraysets['_two'] ds1List, ds2List = devCmts[fCmt] totalSeen = 0 for idx, ds1ds2 in enumerate(zip(ds1List, ds2List)): ds1, ds2 = ds1ds2 if fetchNbytes is None: assert np.allclose(ds1, d[str(idx)]) assert np.allclose(ds2, dd[str(idx)]) else: try: arr1 = d[str(idx)] assert np.allclose(ds1, arr1) totalSeen += arr1.nbytes except FileNotFoundError: pass try: arr2 = dd[str(idx)] assert np.allclose(ds2, arr2) totalSeen += arr2.nbytes except FileNotFoundError: pass assert totalSeen <= fetchNbytes co.close() newRepo._env._close_environments()