def test_initiate_dataset(path, path2): dataset_path = opj(path, 'test') datas = list( initiate_dataset('template', 'testdataset', path=dataset_path)()) assert_equal(len(datas), 1) data = datas[0] eq_(data['dataset_path'], dataset_path) crawl_cfg = opj(dataset_path, CRAWLER_META_CONFIG_PATH) ok_(exists, crawl_cfg) pipeline = load_pipeline_from_config(crawl_cfg) # by default we should initiate to MD5E backend fname = 'test.dat' f = opj(dataset_path, fname) annex = put_file_under_git(f, content="test", annexed=True) eq_(annex.get_file_backend(f), 'MD5E') # and even if we clone it -- nope -- since persistence is set by Annexificator # so we don't need to explicitly to commit it just in master since that might # not be the branch we will end up working in annex2 = AnnexRepo.clone(path=path2, url=dataset_path) annex3 = put_file_under_git(path2, 'test2.dat', content="test2", annexed=True) eq_(annex3.get_file_backend('test2.dat'), 'MD5E') raise SkipTest("TODO much more")
def test_install_consistent_state(src, dest, dest2, dest3): # if we install a dataset, where sub-dataset "went ahead" in that branch, # while super-dataset was not yet updated (e.g. we installed super before) # then it is desired to get that default installed branch to get to the # position where previous location was pointing to. # It is indeed a mere heuristic which might not hold the assumption in some # cases, but it would work for most simple and thus mostly used ones ds1 = create(src) sub1 = ds1.create('sub1') def check_consistent_installation(ds): datasets = [ds] + list( map( Dataset, ds.subdatasets( recursive=True, fulfilled=True, result_xfm='paths'))) assert len(datasets) == 2 # in this test for ds in datasets: # all of them should be in master branch eq_(ds.repo.get_active_branch(), "master") # all of them should be clean, so sub should be installed in a "version" # as pointed by the super ok_(not ds.repo.dirty) dest_ds = install(dest, source=src) # now we progress sub1 by adding sub2 subsub2 = sub1.create('sub2') # and progress subsub2 forward to stay really thorough put_file_under_git(subsub2.path, 'file.dat', content="data") subsub2.save("added a file") # above function does not commit # just installing a submodule -- apparently different code/logic # but also the same story should hold - we should install the version pointed # by the super, and stay all clean dest_sub1 = dest_ds.install('sub1') check_consistent_installation(dest_ds) # So now we have source super-dataset "dirty" with sub1 progressed forward # Our install should try to "retain" consistency of the installation # whenever possible. # install entire hierarchy without specifying dataset # no filter, we want full report dest2_ds = install(dest2, source=src, recursive=True, result_filter=None) check_consistent_installation(dest2_ds[0]) # [1] is the subdataset # install entire hierarchy by first installing top level ds # and then specifying sub-dataset dest3_ds = install(dest3, source=src, recursive=False) # and then install both submodules recursively while pointing # to it based on dest3_ds dest3_ds.install('sub1', recursive=True) check_consistent_installation(dest3_ds)
def test_install_consistent_state(src, dest, dest2, dest3): # if we install a dataset, where sub-dataset "went ahead" in that branch, # while super-dataset was not yet updated (e.g. we installed super before) # then it is desired to get that default installed branch to get to the # position where previous location was pointing to. # It is indeed a mere heuristic which might not hold the assumption in some # cases, but it would work for most simple and thus mostly used ones ds1 = create(src) sub1 = ds1.create('sub1') def check_consistent_installation(ds): datasets = [ds] + list( map(Dataset, ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'))) assert len(datasets) == 2 # in this test for ds in datasets: # all of them should be in master branch eq_(ds.repo.get_active_branch(), "master") # all of them should be clean, so sub should be installed in a "version" # as pointed by the super ok_(not ds.repo.dirty) dest_ds = install(dest, source=src) # now we progress sub1 by adding sub2 subsub2 = sub1.create('sub2') # and progress subsub2 forward to stay really thorough put_file_under_git(subsub2.path, 'file.dat', content="data") subsub2.save("added a file") # above function does not commit # just installing a submodule -- apparently different code/logic # but also the same story should hold - we should install the version pointed # by the super, and stay all clean dest_sub1 = dest_ds.install('sub1') check_consistent_installation(dest_ds) # So now we have source super-dataset "dirty" with sub1 progressed forward # Our install should try to "retain" consistency of the installation # whenever possible. # install entire hierarchy without specifying dataset # no filter, we want full report dest2_ds = install(dest2, source=src, recursive=True, result_filter=None) check_consistent_installation(dest2_ds[0]) # [1] is the subdataset # install entire hierarchy by first installing top level ds # and then specifying sub-dataset dest3_ds = install(dest3, source=src, recursive=False) # and then install both submodules recursively while pointing # to it based on dest3_ds dest3_ds.install('sub1', recursive=True) check_consistent_installation(dest3_ds)
def test_initiate_dataset_new_create_warns(path): try: from datalad.distribution import create except ImportError: # We are on a version of DataLad with the new create. expected_backend = "MD5E" expect_warning = True else: expected_backend = "SHA256E" expect_warning = False path = opj(path, 'test') with swallow_logs(new_level=logging.WARNING) as cml: list( initiate_dataset('template', 'testdataset', backend="SHA256E", path=path)()) if expect_warning: assert_in("datalad.repo.backend", cml.out) # DataLad's new create honors datalad.repo.backend rather than the # crawler-specific one. fname = 'test.dat' f = opj(path, fname) annex = put_file_under_git(f, content="test", annexed=True) eq_(annex.get_file_backend(f), expected_backend)