예제 #1
0
파일: test_repro.py 프로젝트: phdtanvir/dvc
    def test(self):
        dir_name = "dir"
        dir_code = "dir_code.py"

        with open(dir_code, "w+") as fd:
            fd.write("import os; import sys; import shutil; "
                     "shutil.copytree(sys.argv[1], sys.argv[2])")

        stage = self._run(
            outs=[dir_name],
            deps=[self.DATA_DIR, dir_code],
            cmd=f"python {dir_code} {self.DATA_DIR} {dir_name}",
            name="copy-dir",
        )
        target = self._get_stage_target(stage)

        self.assertTrue(stage is not None)

        stages = self.dvc.reproduce(target)
        self.assertEqual(len(stages), 0)

        with open(self.DATA_SUB, "a") as fd:
            fd.write("add")

        stages = self.dvc.reproduce(target)
        self.assertEqual(len(stages), 1)
        self.assertTrue(stages[0] is not None)

        # Check that dvc indeed registers changed output dir
        shutil.move(self.BAR, dir_name)
        stages = self.dvc.reproduce(target)
        self.assertEqual(len(stages), 1)
        self.assertTrue(stages[0] is not None)

        # Check that dvc registers mtime change for the directory.
        System.hardlink(self.DATA_SUB, self.DATA_SUB + ".lnk")
        stages = self.dvc.reproduce(target)
        self.assertEqual(len(stages), 1)
        self.assertTrue(stages[0] is not None)
예제 #2
0
    def hardlink(self, from_info, to_info):
        # If there are a lot of empty files (which happens a lot in datasets),
        # and the cache type is `hardlink`, we might reach link limits and
        # will get something like: `too many links error`
        #
        # This is because all those empty files will have the same hash
        # (i.e. 68b329da9893e34099c7d8ad5cb9c940), therefore, they will be
        # linked to the same file in the cache.
        #
        # From https://en.wikipedia.org/wiki/Hard_link
        #   * ext4 limits the number of hard links on a file to 65,000
        #   * Windows with NTFS has a limit of 1024 hard links on a file
        #
        # That's why we simply create an empty file rather than a link.
        if self.getsize(from_info) == 0:
            self.open(to_info, "w").close()

            logger.debug("Created empty file: {src} -> {dest}".format(
                src=str(from_info), dest=str(to_info)))
            return

        System.hardlink(from_info, to_info)
예제 #3
0
 def hardlink(self, src, link):
     self.project.logger.debug("creating hardlink {} -> {}".format(
         src, link))
     System.hardlink(src, link)
     os.chmod(src, stat.S_IREAD)