Ejemplo n.º 1
0
    def lock_files(self, files, target):
        cmd = 'lock' if target else 'unlock'

        error = 0
        for file in files:
            try:
                data_item = self.settings.path_factory.existing_data_item(file)
                state = StateFile.load(data_item.state.relative, self.settings)

                if state.locked and target:
                    Logger.warn('Data item {} is already locked'.format(
                        data_item.data.relative))
                elif not state.locked and not target:
                    Logger.warn('Data item {} is already unlocked'.format(
                        data_item.data.relative))
                else:
                    state.locked = target
                    Logger.debug('Saving status file for data item {}'.format(
                        data_item.data.relative))
                    state.save()
                    Logger.info('Data item {} was {}ed'.format(
                        data_item.data.relative, cmd))
            except Exception as ex:
                error += 1
                Logger.error('Unable to {} {}: {}'.format(cmd, file, ex))

        if error > 0 and not self.no_git_actions:
            Logger.error(
                'Errors occurred. One or more repro cmd was not successful.')
            self.not_committed_changes_warning()
        else:
            self.commit_if_needed('DVC lock: {}'.format(' '.join(self.args)))

        return 0
Ejemplo n.º 2
0
    def __init__(self, data_item, cmd_obj, globally_changed_files, recursive,
                 force):
        self._data_item = data_item
        self.git = cmd_obj.git
        self._cmd_obj = cmd_obj
        self._globally_changed_files = globally_changed_files
        self._recursive = recursive
        self._force = force

        if not System.islink(data_item.data.relative):
            raise ReproError('data item {} is not symlink'.format(
                data_item.data.relative))

        try:
            self._state = StateFile.load(data_item.state.relative, self.git)
        except Exception as ex:
            raise ReproError(
                'Error: state file "{}" cannot be loaded: {}'.format(
                    data_item.state.relative, ex))

        if not self.state.argv:
            raise ReproError(
                'Error: parameter {} is not defined in state file "{}"'.format(
                    StateFile.PARAM_ARGV, data_item.state.relative))
        if len(self.state.argv) < 1:
            raise ReproError(
                'Error: reproducible cmd in state file "{}" is too short'.
                format(self.state.file))

        self._settings = copy.copy(self._cmd_obj.settings)
        self._settings.set_args(self.state.argv)
        pass
Ejemplo n.º 3
0
    def is_repro_required(self, changed_files, data_item):
        state_file = StateFile.load(data_item.state.relative, self._settings)
        if state_file.locked:
            Logger.debug(
                u'Repro is not required for locked data item {}'.format(
                    data_item.data.relative))
            return False

        is_dependency_check_required = self._recursive

        if not is_dependency_check_required and not self.is_cache_exists():
            is_dependency_check_required = True
            Logger.info(
                u'Reproduction {}. Force dependency check since cache file is missing.'
                .format(self._data_item.data.relative))

        if is_dependency_check_required:
            if self.were_dependencies_changed(changed_files,
                                              data_item.data.dvc):
                self.log_repro_reason(u'input dependencies were changed')
                return True

        if self._force:
            self.log_repro_reason(u'it was forced')
            return True

        if not self.is_cache_exists():
            self.log_repro_reason(u'cache file is missing.')
            return True

        if self.were_sources_changed(self._globally_changed_files):
            self.log_repro_reason(u'sources were changed')
            return True

        return False
Ejemplo n.º 4
0
    def cache(self):
        cache_dir = self.cache_dir

        if self._cache_file:
            file_name = os.path.relpath(os.path.realpath(self._cache_file), cache_dir)
        else:
            from dvc.state_file import StateFile
            file_name = StateFile.load(self, self._git).md5

        cache_file = os.path.join(cache_dir, file_name)
        return Path(cache_file, self._git)
Ejemplo n.º 5
0
    def collect_targets(self):
        targets = []

        for fname in self.git.get_last_merge_changed_files():
            if not StateFile._is_state_file(fname):
                continue

            state = StateFile.load(fname)
            if not state.cmd and state.locked:
                targets.append(fname)

        return targets
Ejemplo n.º 6
0
    def checkout_targets(self, targets):
        items = []
        for fname in targets:
            self.git.checkout_file_before_last_merge(fname)
            state = StateFile.load(fname)
            for out in state.out:
                item = self.settings.path_factory.data_item(os.path.join(state.cwd, out))
                items.append(item)

        CmdCheckout.checkout(items)

        msg = 'DVC merge files: {}'.format(' '.join(targets))
        self.commit_if_needed(msg)
Ejemplo n.º 7
0
    def process_file(self, target):
        data_item = self._get_data_item(target)
        name = data_item.data.relative
        state = StateFile.load(data_item.state.relative, self.git)

        self.g.add_node(name)

        for i in state.input_files:
            self.g.add_node(i)
            self.g.add_edge(i, name)

        for o in state.output_files:
            if o == name:
                continue
            self.g.add_node(o)
            self.g.add_edge(name, o)
Ejemplo n.º 8
0
    def run(self):
        recursive = not self.parsed_args.single_item
        stages = []

        for target in self.parsed_args.targets:
            if StateFile._is_state_file(target):
                stage = StateFile.load(target)
            else:
                stage = StateFile.find_by_output(self.settings, target)

            if stage:
                stages.append(stage)

        self.repro_stages(stages, recursive, self.parsed_args.force)
        names = [os.path.relpath(stage.path) for stage in stages]
        return self.commit_if_needed('DVC repro: {}'.format(names))
Ejemplo n.º 9
0
    def reproduce_dep(self, path, md5, recursive):
        if not self.settings.path_factory.is_data_item(path):
            if md5 != file_md5(os.path.join(self.git.git_dir_abs, path))[0]:
                self.log_repro_reason('source {} was changed'.format(path))
                return True
            return False

        stage = StateFile.find_by_output(self.settings, path)
        if recursive:
            ReproStage(self.settings, stage, self._recursive,
                       self._force).reproduce()

        stage = StateFile.load(stage.path)
        if md5 != stage.out[os.path.relpath(path, stage.cwd)]:
            self.log_repro_reason(
                'data item {} was changed - md5 sum doesn\'t match'.format(
                    path))
            return True

        return False
Ejemplo n.º 10
0
    def __init__(self, data_item, cmd_obj):
        self._data_item = data_item
        self.git = cmd_obj.git
        self._cmd_obj = cmd_obj
        self._state = StateFile.load(data_item.state.relative, self.git)

        cmd_obj._code = self.state.code_dependencies

        argv = self.state.norm_argv

        if not argv:
            raise ReproError(
                'Error: parameter {} is nor defined in state file "{}"'.format(
                    StateFile.PARAM_NORM_ARGV, data_item.state.relative))
        if len(argv) < 2:
            raise ReproError(
                'Error: reproducible cmd in state file "{}" is too short'.
                format(self.state.file))

        self._repro_argv = argv
        pass