def test(self): dir_name = "dir" dir_code = "dir_code.py" with open(dir_code, "w+") as fd: fd.write("import os; import sys; import shutil; " "shutil.copytree(sys.argv[1], sys.argv[2])") stage = self._run( outs=[dir_name], deps=[self.DATA_DIR, dir_code], cmd=f"python {dir_code} {self.DATA_DIR} {dir_name}", name="copy-dir", ) target = self._get_stage_target(stage) self.assertTrue(stage is not None) stages = self.dvc.reproduce(target) self.assertEqual(len(stages), 0) with open(self.DATA_SUB, "a") as fd: fd.write("add") stages = self.dvc.reproduce(target) self.assertEqual(len(stages), 1) self.assertTrue(stages[0] is not None) # Check that dvc indeed registers changed output dir shutil.move(self.BAR, dir_name) stages = self.dvc.reproduce(target) self.assertEqual(len(stages), 1) self.assertTrue(stages[0] is not None) # Check that dvc registers mtime change for the directory. System.hardlink(self.DATA_SUB, self.DATA_SUB + ".lnk") stages = self.dvc.reproduce(target) self.assertEqual(len(stages), 1) self.assertTrue(stages[0] is not None)
def hardlink(self, from_info, to_info): # If there are a lot of empty files (which happens a lot in datasets), # and the cache type is `hardlink`, we might reach link limits and # will get something like: `too many links error` # # This is because all those empty files will have the same hash # (i.e. 68b329da9893e34099c7d8ad5cb9c940), therefore, they will be # linked to the same file in the cache. # # From https://en.wikipedia.org/wiki/Hard_link # * ext4 limits the number of hard links on a file to 65,000 # * Windows with NTFS has a limit of 1024 hard links on a file # # That's why we simply create an empty file rather than a link. if self.getsize(from_info) == 0: self.open(to_info, "w").close() logger.debug("Created empty file: {src} -> {dest}".format( src=str(from_info), dest=str(to_info))) return System.hardlink(from_info, to_info)
def hardlink(self, src, link): self.project.logger.debug("creating hardlink {} -> {}".format( src, link)) System.hardlink(src, link) os.chmod(src, stat.S_IREAD)