def test(self): cmd = 'python {} {} {}'.format(self.CODE, self.FOO, 'out') deps = [self.FOO, self.CODE] outs = [os.path.join(self.dvc.root_dir, 'out')] outs_no_cache = [] locked = False fname = os.path.join(self.dvc.root_dir, 'out.dvc') cwd = os.curdir self.dvc.add(self.FOO) stage = self.dvc.run(cmd=cmd, deps=deps, outs=outs, outs_no_cache=outs_no_cache, locked=locked, fname=fname, cwd=cwd) self.assertTrue(filecmp.cmp(self.FOO, 'out')) self.assertTrue(os.path.isfile(stage.path)) self.assertEqual(stage.cmd, cmd) self.assertEqual(len(stage.deps), len(deps)) self.assertEqual(len(stage.outs), len(outs + outs_no_cache)) self.assertEqual(stage.outs[0].path, outs[0]) self.assertEqual(stage.outs[0].md5, file_md5(self.FOO)[0]) self.assertEqual(stage.locked, locked) self.assertTrue(stage.path, fname)
def move_data_to_cache(self): md5 = file_md5(self.data.relative)[0] self._cache_file = os.path.join(self.cache_dir_abs, md5) self._git.modify_gitignore([self.data.relative]) if not os.path.isfile(self.cache.relative): System.hardlink(self.data.relative, self.cache.relative) os.chmod(self.data.relative, stat.S_IREAD) LocalStateFile(self).save()
def update(self, md5=None): self.md5 = md5 if not self.md5: if not os.path.exists(self.path): raise OutputDoesNotExistError(self.path) if not os.path.isfile(self.path): raise OutputIsNotFileError(self.path) self.md5 = file_md5(self.path)[0] self.project.state.update(self.path, self.md5, self.mtime())
def _changed_md5(self): if not os.path.exists(self.path): return True state = self.project.state.get(self.path) if state and state.mtime == self.mtime(): md5 = state.md5 else: md5 = file_md5(self.path)[0] return self.md5 != md5
def test(self): md5 = file_md5(self.FOO)[0] stage = self.dvc.add(self.FOO) self.assertIsInstance(stage, Stage) self.assertTrue(os.path.isfile(stage.path)) self.assertEqual(len(stage.outs), 1) self.assertEqual(len(stage.deps), 0) self.assertTrue(stage.locked) self.assertEqual(stage.cmd, None) self.assertEqual(stage.outs[0].md5, md5)
def update(self): if not os.path.exists(self.path): raise OutputDoesNotExistError(self.path) if not os.path.isfile(self.path): raise OutputIsNotFileError(self.path) state = self.project.state.get(self.path) if state and state.mtime == self.mtime(): md5 = state.md5 msg = '{} using md5 {} from state file' self.project.logger.debug(msg.format(self.path, md5)) self.md5 = md5 else: self.md5 = file_md5(self.path)[0] self.project.state.update(self.path, self.md5, self.mtime())
def parse_deps_state(settings, deps, currdir=None): state = {} for dep in deps: if settings.path_factory.is_data_item(dep): item = settings.path_factory.data_item(dep) md5 = StateFile.find_md5(item) else: md5 = file_md5(os.path.join(settings.git.git_dir_abs, dep))[0] if currdir: name = os.path.relpath(dep, currdir) else: name = dep state[name] = md5 return state
def create_file_fsck_state(self, dvc_path): full_path = os.path.join(self.project.root_dir, dvc_path) if self.physical: md5 = file_md5(full_path)[0] else: md5 = None hardlink_md5 = self.caches.get(dvc_path) state = self.project.state.get(dvc_path) fsck_deps = [ FsckFileDep(stage.dvc_path, dep) for stage, dep in self.files_and_stages.get(dvc_path, []) ] return FsckFile(dvc_path, full_path, md5, hardlink_md5, state, fsck_deps)
def reproduce_dep(self, path, md5, recursive): if not self.settings.path_factory.is_data_item(path): if md5 != file_md5(os.path.join(self.git.git_dir_abs, path))[0]: self.log_repro_reason('source {} was changed'.format(path)) return True return False stage = StateFile.find_by_output(self.settings, path) if recursive: ReproStage(self.settings, stage, self._recursive, self._force).reproduce() stage = StateFile.load(stage.path) if md5 != stage.out[os.path.relpath(path, stage.cwd)]: self.log_repro_reason( 'data item {} was changed - md5 sum doesn\'t match'.format( path)) return True return False
def print_fsck(self, caches, dvc_files, files_and_stages): for file in dvc_files: print(u'File {}:'.format(file)) full_path = os.path.join(self.project.root_dir, file) if self.args.physical: md5 = file_md5(full_path)[0] print(u' Actual checksum: {}'.format(md5)) else: md5 = None hardlink_md5 = caches.get(file) hardlink_msg = hardlink_md5 if hardlink_md5 else 'No cache file found' hardlink_error = '!!!' if hardlink_md5 and md5 and md5 != hardlink_md5 else '' print(u' Hardlink to cache file: {} {}'.format( hardlink_msg, hardlink_error)) state = self.project.state.get(file) local_state_error = '!!!' if state.md5 and hardlink_md5 and state.md5 != hardlink_md5 else '' print(u' Local state checksum: {} {}'.format( state.md5 if state else '', local_state_error)) print(u' Local state mtime: {}'.format( state.mtime if state else '')) mtime = os.path.getmtime(full_path) if os.path.exists( full_path) else 'None' print(u' Actual mtime: {}'.format(mtime)) for stage, dep in files_and_stages.get(file, []): stage_error = '!!!' if dep.md5 and hardlink_md5 and dep.md5 != hardlink_md5 else '' print(u' Stage file: {}'.format(stage.dvc_path)) print(u' Type: {}'.format( type(dep).__name__)) print(u' Checksum: {} {}'.format( dep.md5, stage_error)) print(u' Use cache: {}'.format( str(dep.use_cache).lower())) pass
def import_cache(self, fname): self._cache_file = os.path.join(self.cache_dir, file_md5(fname)[0]) os.rename(fname, self._cache_file)
def _changed_md5(self): return self.md5 != file_md5(self.path)[0]