def lock_files(self, files, target): cmd = 'lock' if target else 'unlock' error = 0 for file in files: try: data_item = self.settings.path_factory.existing_data_item(file) state = StateFile.load(data_item.state.relative, self.settings) if state.locked and target: Logger.warn('Data item {} is already locked'.format( data_item.data.relative)) elif not state.locked and not target: Logger.warn('Data item {} is already unlocked'.format( data_item.data.relative)) else: state.locked = target Logger.debug('Saving status file for data item {}'.format( data_item.data.relative)) state.save() Logger.info('Data item {} was {}ed'.format( data_item.data.relative, cmd)) except Exception as ex: error += 1 Logger.error('Unable to {} {}: {}'.format(cmd, file, ex)) if error > 0 and not self.no_git_actions: Logger.error( 'Errors occurred. One or more repro cmd was not successful.') self.not_committed_changes_warning() else: self.commit_if_needed('DVC lock: {}'.format(' '.join(self.args))) return 0
def __init__(self, data_item, cmd_obj, globally_changed_files, recursive, force): self._data_item = data_item self.git = cmd_obj.git self._cmd_obj = cmd_obj self._globally_changed_files = globally_changed_files self._recursive = recursive self._force = force if not System.islink(data_item.data.relative): raise ReproError('data item {} is not symlink'.format( data_item.data.relative)) try: self._state = StateFile.load(data_item.state.relative, self.git) except Exception as ex: raise ReproError( 'Error: state file "{}" cannot be loaded: {}'.format( data_item.state.relative, ex)) if not self.state.argv: raise ReproError( 'Error: parameter {} is not defined in state file "{}"'.format( StateFile.PARAM_ARGV, data_item.state.relative)) if len(self.state.argv) < 1: raise ReproError( 'Error: reproducible cmd in state file "{}" is too short'. format(self.state.file)) self._settings = copy.copy(self._cmd_obj.settings) self._settings.set_args(self.state.argv) pass
def is_repro_required(self, changed_files, data_item): state_file = StateFile.load(data_item.state.relative, self._settings) if state_file.locked: Logger.debug( u'Repro is not required for locked data item {}'.format( data_item.data.relative)) return False is_dependency_check_required = self._recursive if not is_dependency_check_required and not self.is_cache_exists(): is_dependency_check_required = True Logger.info( u'Reproduction {}. Force dependency check since cache file is missing.' .format(self._data_item.data.relative)) if is_dependency_check_required: if self.were_dependencies_changed(changed_files, data_item.data.dvc): self.log_repro_reason(u'input dependencies were changed') return True if self._force: self.log_repro_reason(u'it was forced') return True if not self.is_cache_exists(): self.log_repro_reason(u'cache file is missing.') return True if self.were_sources_changed(self._globally_changed_files): self.log_repro_reason(u'sources were changed') return True return False
def cache(self): cache_dir = self.cache_dir if self._cache_file: file_name = os.path.relpath(os.path.realpath(self._cache_file), cache_dir) else: from dvc.state_file import StateFile file_name = StateFile.load(self, self._git).md5 cache_file = os.path.join(cache_dir, file_name) return Path(cache_file, self._git)
def collect_targets(self): targets = [] for fname in self.git.get_last_merge_changed_files(): if not StateFile._is_state_file(fname): continue state = StateFile.load(fname) if not state.cmd and state.locked: targets.append(fname) return targets
def checkout_targets(self, targets): items = [] for fname in targets: self.git.checkout_file_before_last_merge(fname) state = StateFile.load(fname) for out in state.out: item = self.settings.path_factory.data_item(os.path.join(state.cwd, out)) items.append(item) CmdCheckout.checkout(items) msg = 'DVC merge files: {}'.format(' '.join(targets)) self.commit_if_needed(msg)
def process_file(self, target): data_item = self._get_data_item(target) name = data_item.data.relative state = StateFile.load(data_item.state.relative, self.git) self.g.add_node(name) for i in state.input_files: self.g.add_node(i) self.g.add_edge(i, name) for o in state.output_files: if o == name: continue self.g.add_node(o) self.g.add_edge(name, o)
def run(self): recursive = not self.parsed_args.single_item stages = [] for target in self.parsed_args.targets: if StateFile._is_state_file(target): stage = StateFile.load(target) else: stage = StateFile.find_by_output(self.settings, target) if stage: stages.append(stage) self.repro_stages(stages, recursive, self.parsed_args.force) names = [os.path.relpath(stage.path) for stage in stages] return self.commit_if_needed('DVC repro: {}'.format(names))
def reproduce_dep(self, path, md5, recursive): if not self.settings.path_factory.is_data_item(path): if md5 != file_md5(os.path.join(self.git.git_dir_abs, path))[0]: self.log_repro_reason('source {} was changed'.format(path)) return True return False stage = StateFile.find_by_output(self.settings, path) if recursive: ReproStage(self.settings, stage, self._recursive, self._force).reproduce() stage = StateFile.load(stage.path) if md5 != stage.out[os.path.relpath(path, stage.cwd)]: self.log_repro_reason( 'data item {} was changed - md5 sum doesn\'t match'.format( path)) return True return False
def __init__(self, data_item, cmd_obj): self._data_item = data_item self.git = cmd_obj.git self._cmd_obj = cmd_obj self._state = StateFile.load(data_item.state.relative, self.git) cmd_obj._code = self.state.code_dependencies argv = self.state.norm_argv if not argv: raise ReproError( 'Error: parameter {} is nor defined in state file "{}"'.format( StateFile.PARAM_NORM_ARGV, data_item.state.relative)) if len(argv) < 2: raise ReproError( 'Error: reproducible cmd in state file "{}" is too short'. format(self.state.file)) self._repro_argv = argv pass