Ejemplo n.º 1
0
Archivo: local.py Proyecto: tonyle9/dvc
    def checkout(self, path_info, checksum_info):
        path = path_info['path']
        md5 = checksum_info.get(self.PARAM_MD5, None)
        cache = self.get(md5)

        if not cache:
            Logger.warn('No cache info for \'{}\'. Skipping checkout.'.format(os.path.relpath(path)))
            return

        if self.changed(md5):
            msg = u'Cache \'{}\' not found. File \'{}\' won\'t be created.'
            Logger.warn(msg.format(md5, os.path.relpath(path)))
            remove(path)
            return

        if os.path.exists(path):
            msg = u'Data \'{}\' exists. Removing before checkout'
            Logger.debug(msg.format(os.path.relpath(path)))
            remove(path)

        msg = u'Checking out \'{}\' with cache \'{}\''
        Logger.debug(msg.format(os.path.relpath(path), md5))

        if not self.is_dir_cache(cache):
            self.link(cache, path)
            self.link_state.update(path)
            return

        # Create dir separately so that dir is created
        # even if there are no files in it
        if not os.path.exists(path):
            os.makedirs(path)

        for entry in self.load_dir_cache(md5):
            md5 = entry[self.PARAM_MD5]
            c = self.get(md5)
            relpath = entry[self.PARAM_RELPATH]
            p = os.path.join(path, relpath)
            self.link(c, p)
        self.link_state.update(path)
Ejemplo n.º 2
0
    def push(self, path):
        """ push, aws version """

        aws_key = self.cache_file_key(path)
        bucket = self._get_bucket_aws(self.storage_bucket)
        key = bucket.get_key(aws_key)
        if key:
            Logger.debug(
                'File already uploaded to the cloud. Checksum validation...')

            if self._cmp_checksum(key, path):
                Logger.debug('File checksum matches. No uploading is needed.')
                return path

            Logger.debug('Checksum miss-match. Re-uploading is required.')

        key = bucket.new_key(aws_key)

        try:
            self._push_multipart(key, path)
        except Exception as exc:
            Logger.error('Failed to upload "{}": {}'.format(path, exc))
            return None

        progress.finish_target(os.path.basename(path))

        return path
Ejemplo n.º 3
0
    def push(self, data_item):
        """ push, gcp version """

        bucket = self._get_bucket_gc(self.storage_bucket)
        blob_name = self.cache_file_key(data_item.resolved_cache.dvc)
        name = os.path.basename(data_item.resolved_cache.dvc)

        blob = bucket.get_blob(blob_name)
        if blob is not None and blob.exists():
            if self._cmp_checksum(blob, data_item.resolved_cache.dvc):
                Logger.debug('checksum %s matches.  Skipping upload' %
                             data_item.cache.relative)
                return data_item
            Logger.debug('checksum %s mismatch.  re-uploading' %
                         data_item.cache.relative)

        # same as in _import
        progress.update_target(name, 0, None)

        blob = bucket.blob(blob_name)
        blob.upload_from_filename(data_item.resolved_cache.relative)

        progress.finish_target(name)
        Logger.debug('uploading %s completed' %
                     data_item.resolved_cache.relative)

        return data_item
Ejemplo n.º 4
0
    def _import(self, bucket_name, key_name, fname, data_item):

        bucket = self._get_bucket_aws(bucket_name)

        tmp_file = self.tmp_file(fname)
        name = os.path.basename(fname)
        key = bucket.get_key(key_name)
        if not key:
            Logger.error(
                'File "{}" does not exist in the cloud'.format(key_name))
            return None

        if self._cmp_checksum(key, fname):
            Logger.debug('File "{}" matches with "{}".'.format(
                fname, key_name))
            return data_item

        Logger.debug('Downloading cache file from S3 "{}/{}" to "{}"'.format(
            bucket.name, key_name, fname))

        res_h = ResumableDownloadHandler(
            tracker_file_name=self._download_tracker(tmp_file), num_retries=10)
        try:
            key.get_contents_to_filename(tmp_file,
                                         cb=create_cb(name),
                                         res_download_handler=res_h)
            os.rename(tmp_file, fname)
        except Exception as exc:
            Logger.error('Failed to download "{}": {}'.format(key_name, exc))
            return None

        progress.finish_target(name)
        Logger.debug('Downloading completed')

        return data_item
Ejemplo n.º 5
0
    def create_state_files(self, targets, lock):
        """
        Create state files for all targets.
        """
        for t in targets:
            orig_target, processed_data_item = t
            input, data_item = orig_target
            output = data_item.data.relative

            if processed_data_item == None:
                Logger.debug(
                    'Skipping creating state file for failed import {}'.format(
                        data_item.state.relative))
                continue

            Logger.debug('Creating symlink {} --> {}'.format(
                data_item.symlink_file, data_item.data.relative))
            System.symlink(data_item.symlink_file, data_item.data.relative)

            state_file = StateFile(StateFile.COMMAND_IMPORT_FILE,
                                   data_item.state.relative,
                                   self.settings,
                                   argv=[input, output],
                                   input_files=[],
                                   output_files=[output],
                                   lock=lock)
            state_file.save()
            Logger.debug('State file "{}" was created'.format(
                data_item.state.relative))
Ejemplo n.º 6
0
    def run_command(self,
                    cmd_args,
                    data_items_from_args,
                    not_data_items_from_args,
                    stdout=None,
                    stderr=None,
                    shell=False):
        Logger.debug(
            'Run command with args: {}. Data items from args: {}. stdout={}, stderr={}, shell={}'
            .format(' '.join(cmd_args),
                    ', '.join([x.data.dvc for x in data_items_from_args]),
                    stdout, stderr, shell))

        repo_change = RepositoryChange(cmd_args,
                                       self.settings,
                                       stdout,
                                       stderr,
                                       shell=shell)

        if not self.no_git_actions and not self._validate_file_states(
                repo_change):
            self.remove_new_files(repo_change)
            raise RunError('Errors occurred.')

        output_set = set(self.declaration_output_data_items +
                         repo_change.changed_data_items)
        output_files_dvc = [x.data.dvc for x in output_set]

        input_set = set(data_items_from_args +
                        self.declaration_input_data_items) - output_set
        input_files_dvc = [x.data.dvc for x in input_set]

        code_dependencies_dvc = self.git.abs_paths_to_dvc(
            self.code_dependencies + not_data_items_from_args)

        result = []
        for data_item in repo_change.changed_data_items:
            Logger.debug(
                'Move output file "{}" to cache dir "{}" and create a symlink'.
                format(data_item.data.relative, data_item.cache.relative))
            data_item.move_data_to_cache()

            Logger.debug('Create state file "{}"'.format(
                data_item.state.relative))

            state_file = StateFile(StateFile.COMMAND_RUN,
                                   data_item.state.relative,
                                   self.settings,
                                   input_files_dvc,
                                   output_files_dvc,
                                   code_dependencies_dvc,
                                   argv=cmd_args,
                                   lock=self.lock,
                                   stdout=self._stdout_to_dvc(stdout),
                                   stderr=self._stdout_to_dvc(stderr),
                                   shell=shell)
            state_file.save()
            result.append(state_file)

        return result
Ejemplo n.º 7
0
Archivo: ssh.py Proyecto: pspeter/dvc
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        ssh = self.ssh(host=to_infos[0]['host'],
                       user=to_infos[0]['user'],
                       port=to_infos[0]['port'])
        sftp = ssh.open_sftp()

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 'ssh':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            Logger.debug("Uploading '{}' to '{}/{}'".format(from_info['path'],
                                                            to_info['host'],
                                                            to_info['path']))

            if not name:
                name = os.path.basename(from_info['path'])

            dname = posixpath.dirname(to_info['path'])
            self._exec(ssh, 'mkdir -p {}'.format(dname))

            try:
                sftp.put(from_info['path'],
                         to_info['path'],
                         callback=create_cb(name))
            except Exception as exc:
                msg = "Failed to upload '{}' to '{}/{}'"
                Logger.error(msg.format(from_info['path'],
                                        to_info['host'],
                                        to_info['path'], exc))
                continue

            progress.finish_target(name)

        sftp.close()
        ssh.close()
Ejemplo n.º 8
0
    def sync_to_cloud(self, data_item):
        """ sync_to_cloud, aws version """

        aws_key = self.cache_file_key(data_item.resolved_cache.dvc)
        bucket = self._get_bucket_aws()
        key = bucket.get_key(aws_key)
        if key:
            Logger.debug(
                'File already uploaded to the cloud. Checksum validation...')

            md5_cloud = key.etag[1:-1]
            md5_local = file_md5(data_item.resolved_cache.dvc)[0]
            if md5_cloud == md5_local:
                Logger.debug('File checksum matches. No uploading is needed.')
                return

            Logger.debug('Checksum miss-match. Re-uploading is required.')

        key = bucket.new_key(aws_key)

        try:
            key.set_contents_from_filename(
                data_item.resolved_cache.relative,
                cb=create_cb(data_item.resolved_cache.relative))
        except Exception as exc:
            Logger.error('Failed to upload "{}": {}'.format(
                data_item.resolved_cache.relative, exc))
            return

        progress.finish_target(
            os.path.basename(data_item.resolved_cache.relative))
Ejemplo n.º 9
0
    def create_state_files(self, targets):
        """
        Create state files for all targets.
        """
        for data_item in targets:
            Logger.debug('Creating state file for {}'.format(
                data_item.data.relative))

            fname = os.path.basename(data_item.data.relative +
                                     StateFile.STATE_FILE_SUFFIX)
            out = StateFile.parse_deps_state(self.settings,
                                             [data_item.data.relative],
                                             currdir=os.path.curdir)
            state_file = StateFile(fname=fname,
                                   cmd=None,
                                   out=out,
                                   out_git=[],
                                   deps=[],
                                   locked=True)
            state_file.save()
            Logger.debug('State file "{}" was created'.format(
                data_item.state.relative))
Ejemplo n.º 10
0
    def link(self, src, link):
        dname = os.path.dirname(link)
        if not os.path.exists(dname):
            os.makedirs(dname)

        if self.cache_type != None:
            types = [self.cache_type]
        else:
            types = self.CACHE_TYPES

        for typ in types:
            try:
                msg = u'Checking out \'{}\' with cache \'{}\''
                Logger.debug(msg.format(os.path.relpath(src), os.path.relpath(link)))
                self.CACHE_TYPE_MAP[typ](src, link)
                self.link_state.update(link)
                return
            except Exception as exc:
                msg = 'Cache type \'{}\' is not supported'.format(typ)
                Logger.debug(msg)
                if typ == types[-1]:
                    raise DvcException(msg, cause=exc)
Ejemplo n.º 11
0
    def download(self,
                 from_infos,
                 to_infos,
                 no_progress_bar=False,
                 names=None):
        names = self._verify_path_args(from_infos, to_infos, names)

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info['scheme'] != self.scheme:
                raise NotImplementedError

            if to_info['scheme'] != 'local':
                raise NotImplementedError

            bucket = from_info['bucket']
            key = from_info['key']

            Logger.debug("Downloading '{}/{}' to '{}'".format(
                bucket, key, to_info['path']))

            tmp_file = self.tmp_file(to_info['path'])
            if not name:
                name = os.path.basename(to_info['path'])

            cb = None if no_progress_bar else Callback(name)

            self._makedirs(to_info['path'])

            try:
                self.blob_service.get_blob_to_path(
                    bucket, key, tmp_file, progress_callback=cb)
            except Exception as exc:
                msg = "Failed to download '{}/{}'".format(bucket, key)
                Logger.warn(msg, exc)
            else:
                os.rename(tmp_file, to_info['path'])

                if not no_progress_bar:
                    progress.finish_target(name)
Ejemplo n.º 12
0
    def sync_to_cloud(self, data_item):
        """ sync_to_cloud, gcp version """

        bucket = self._get_bucket_gc()
        blob_name = self.cache_file_key(data_item.resolved_cache.dvc)

        blob = bucket.get_blob(blob_name)
        if blob is not None and blob.exists():
            b64_encoded_md5 = base64.b64encode(
                file_md5(data_item.resolved_cache.dvc)[1])

            if blob.md5_hash == b64_encoded_md5:
                Logger.debug('checksum %s matches.  Skipping upload' %
                             data_item.cache.relative)
                return
            Logger.debug('checksum %s mismatch.  re-uploading' %
                         data_item.cache.relative)

        blob = bucket.blob(blob_name)
        blob.upload_from_filename(data_item.resolved_cache.relative)
        Logger.info('uploading %s completed' %
                    data_item.resolved_cache.relative)
Ejemplo n.º 13
0
    def ssh(self, host=None, user=None, port=None):
        msg = "Establishing ssh connection with '{}' " \
              "through port '{}' as user '{}'"
        Logger.debug(msg.format(host, port, user))

        ssh = paramiko.SSHClient()

        ssh.load_system_host_keys()
        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())

        if self.ask_password and self.password is None:
            msg = "host '{}' port '{}' user '{}'".format(host, port, user)
            self.password = self.project.prompt.prompt_password(msg)

        ssh.connect(host,
                    username=user,
                    port=port,
                    key_filename=self.keyfile,
                    timeout=self.timeout,
                    password=self.password)

        return ssh
Ejemplo n.º 14
0
    def download_target(self, target):
        """
        Download single target from url or from local path.
        """
        input = target[0]
        output = target[2].cache.relative

        if self.is_url(input):
            Logger.debug("Downloading {} -> {}.".format(input, output))
            self.download_file(input, output)
            Logger.debug("Done downloading {} -> {}.".format(input, output))
        else:
            Logger.debug("Copying {} -> {}".format(input, output))
            self.copy_file(input, output)
            Logger.debug("Dony copying {} -> {}".format(input, output))
Ejemplo n.º 15
0
    def upload(self, paths, path_infos, names=None):
        assert isinstance(paths, list)
        assert isinstance(path_infos, list)
        assert len(paths) == len(path_infos)
        if not names:
            names = len(paths) * [None]
        else:
            assert isinstance(names, list)
            assert len(names) == len(paths)

        ssh = self.ssh(host=path_infos[0]['host'], user=path_infos[0]['user'])
        sftp = ssh.open_sftp()

        for path, path_info, name in zip(paths, path_infos, names):
            if path_info['scheme'] != 'ssh':
                raise NotImplementedError

            Logger.debug("Uploading '{}' to '{}/{}'".format(
                path, path_info['host'], path_info['path']))

            if not name:
                name = os.path.basename(path)

            dname = posixpath.dirname(path_info['path'])
            self._exec(ssh, 'mkdir -p {}'.format(dname))

            try:
                sftp.put(path, path_info['path'], callback=create_cb(name))
            except Exception as exc:
                Logger.error(
                    "Failed to upload '{}' to '{}/{}'".format(
                        path, path_info['host'], path_info['path']), exc)
                continue

            progress.finish_target(name)

        sftp.close()
        ssh.close()
Ejemplo n.º 16
0
    def check(self):
        current = VERSION_BASE

        if os.getenv('CI'):
            return

        if os.path.isfile(self.updater_file):
            ctime = os.path.getctime(self.updater_file)
            if time.time() - ctime < self.TIMEOUT:
                msg = '{} is not old enough to check for updates'
                Logger.debug(msg.format(self.UPDATER_FILE))
                return

            os.unlink(self.updater_file)

        Logger.info('Checking for updates...')

        try:
            r = requests.get(self.URL, timeout=self.TIMEOUT_GET)
            j = r.json()
            latest = j['version']
            open(self.updater_file, 'w+').close()
        except Exception as exc:
            msg = 'Failed to obtain latest version: {}'.format(str(exc))
            Logger.debug(msg)
            return

        l_major, l_minor, l_patch = [int(x) for x in latest.split('.')]
        c_major, c_minor, c_patch = [int(x) for x in current.split('.')]

        if l_major <= c_major and \
           l_minor <= c_minor and \
           l_patch <= c_patch:
            return

        msg = 'You are using dvc version {}, however version {} is ' \
              'available. Consider upgrading.'
        Logger.warn(msg.format(current, latest))
Ejemplo n.º 17
0
    def _save_dir(self, path_info):
        path = path_info['path']
        md5 = self.state.update(path)
        cache = self.get(md5)
        dname = os.path.dirname(cache)
        dir_info = self.state.collect_dir(path)

        for entry in dir_info:
            relpath = entry[State.PARAM_RELPATH]
            p = os.path.join(path, relpath)

            self._save_file({'scheme': 'local', 'path': p})

        if not os.path.isdir(dname):
            os.makedirs(dname)

        Logger.debug(u'Saving directory \'{}\' to \'{}\''.format(os.path.relpath(path),
                                                                 os.path.relpath(cache)))

        with open(cache, 'w+') as fd:
            json.dump(dir_info, fd, sort_keys=True)

        return {self.PARAM_MD5: md5}
Ejemplo n.º 18
0
    def link(self, cache, path):
        assert os.path.isfile(cache)

        dname = os.path.dirname(path)
        if not os.path.exists(dname):
            os.makedirs(dname)

        i = len(self.cache_types)
        while i > 0:
            try:
                self.CACHE_TYPE_MAP[self.cache_types[0]](cache, path)
                msg = "Created '{}': {} -> {}"
                Logger.info(
                    msg.format(self.cache_types[0], os.path.relpath(cache),
                               os.path.relpath(path)))
                return
            except DvcException as exc:
                msg = 'Cache type \'{}\' is not supported: {}'
                Logger.debug(msg.format(self.cache_types[0], str(exc)))
                del self.cache_types[0]
                i -= 1

        raise DvcException('No possible cache types left to try out.')
Ejemplo n.º 19
0
    def create_state_files(self, targets, lock):
        """
        Create state files for all targets.
        """
        for t in targets:
            input = t[0]
            output = t[1]
            data_item = t[2]

            Logger.debug('Creating symlink {} --> {}'.format(
                data_item.symlink_file, data_item.data.relative))
            System.symlink(data_item.symlink_file, data_item.data.relative)

            state_file = StateFile(StateFile.COMMAND_IMPORT_FILE,
                                   data_item.state.relative,
                                   self.settings,
                                   argv=[input, output],
                                   input_files=[],
                                   output_files=[output],
                                   lock=lock)
            state_file.save()
            Logger.debug('State file "{}" was created'.format(
                data_item.state.relative))
Ejemplo n.º 20
0
    def git_dir(self):
        if self._git_dir:
            return self._git_dir

        try:
            Logger.debug(
                '[dvc-git] Getting git directory. Command: git rev-parse --show-toplevel'
            )
            code, out, err = Executor.exec_cmd(
                ['git', 'rev-parse', '--show-toplevel'])

            if code != 0:
                raise ExecutorError(
                    '[dvc-git] Git directory command error - {}'.format(err))
            Logger.debug('[dvc-git] Getting git directory. Success.')

            self._git_dir = out
            return self._git_dir
        except ExecutorError:
            raise
        except Exception as e:
            raise ExecutorError('Unable to run git command: {}'.format(e))
        pass
Ejemplo n.º 21
0
    def download(self, path_infos, paths, no_progress_bar=False, names=None):
        assert isinstance(paths, list)
        assert isinstance(path_infos, list)
        assert len(paths) == len(path_infos)
        if not names:
            names = len(paths) * [None]
        else:
            assert isinstance(names, list)
            assert len(names) == len(paths)

        ssh = self.ssh(host=path_infos[0]['host'], user=path_infos[0]['user'])

        for path, path_info, name in zip(paths, path_infos, names):
            if path_info['scheme'] != 'ssh':
                raise NotImplementedError

            Logger.debug("Downloading '{}/{}' to '{}'".format(
                path_info['host'], path_info['path'], path))
            if not name:
                name = os.path.basename(path)

            self._makedirs(path)
            tmp_file = self.tmp_file(path)
            try:
                ssh.open_sftp().get(path_info['path'],
                                    tmp_file,
                                    callback=create_cb(name))
            except Exception as exc:
                Logger.error(
                    "Failed to download '{}/{}' to '{}'".format(
                        path_info['host'], path_info['path'], path), exc)
                continue

            os.rename(tmp_file, path)
            progress.finish_target(name)

        ssh.close()
Ejemplo n.º 22
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 'local':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            Logger.debug("Uploading '{}' to '{}'".format(
                from_info['path'], to_info['path']))

            if not name:
                name = os.path.basename(from_info['path'])

            self._makedirs(to_info['path'])

            try:
                copyfile(from_info['path'], to_info['path'], name=name)
            except Exception as exc:
                msg = "Failed to upload '{}' tp '{}'"
                Logger.error(msg.format(from_info['path'], to_info['path']),
                             exc)
Ejemplo n.º 23
0
    def upload(self, paths, path_infos, names=None):
        assert isinstance(paths, list)
        assert isinstance(path_infos, list)
        assert len(paths) == len(path_infos)
        if not names:
            names = len(paths) * [None]
        else:
            assert isinstance(names, list)
            assert len(names) == len(paths)

        session = boto3.session.Session()
        s3 = session.client('s3')

        for path, path_info, name in zip(paths, path_infos, names):
            if path_info['scheme'] != 's3':
                raise NotImplementedError

            Logger.debug("Uploading '{}' to '{}/{}'".format(
                path, path_info['bucket'], path_info['key']))

            if not name:
                name = os.path.basename(path)

            total = os.path.getsize(path)
            cb = Callback(name, total)

            try:
                s3.upload_file(path,
                               path_info['bucket'],
                               path_info['key'],
                               Callback=cb)
            except Exception as exc:
                Logger.error("Failed to upload '{}'".format(path), exc)
                continue

            progress.finish_target(name)
Ejemplo n.º 24
0
Archivo: gs.py Proyecto: Faadal/dvc
    def download(self, path_infos, paths, no_progress_bar=False, names=None):
        names = self._verify_path_args(path_infos, paths, names)

        gs = self.gs

        for path, path_info, name in zip(paths, path_infos, names):
            if path_info['scheme'] != 'gs':
                raise NotImplementedError

            Logger.debug("Downloading '{}/{}' to '{}'".format(
                path_info['bucket'], path_info['key'], path))

            tmp_file = self.tmp_file(path)
            if not name:
                name = os.path.basename(path)

            if not no_progress_bar:
                # percent_cb is not available for download_to_filename, so
                # lets at least update progress at keypoints(start, finish)
                progress.update_target(name, 0, None)

            self._makedirs(path)

            try:
                gs.bucket(path_info['bucket']).get_blob(
                    path_info['key']).download_to_filename(tmp_file)
            except Exception as exc:
                Logger.error(
                    "Failed to download '{}/{}' to '{}'".format(
                        path_info['bucket'], path_info['key'], path), exc)
                continue

            os.rename(tmp_file, path)

            if not no_progress_bar:
                progress.finish_target(name)
Ejemplo n.º 25
0
    def remove_from_cloud(self, aws_file_name):
        Logger.debug(
            u'[Cmd-Remove] Remove from cloud {}.'.format(aws_file_name))

        if not self.config.aws_access_key_id or not self.config.aws_secret_access_key:
            Logger.debug(
                '[Cmd-Remove] Unable to check cache file in the cloud')
            return
        conn = S3Connection(self.config.aws_access_key_id,
                            self.config.aws_secret_access_key)
        bucket_name = self.config.storage_bucket
        bucket = conn.lookup(bucket_name)
        if bucket:
            key = bucket.get_key(aws_file_name)
            if not key:
                Logger.warn(
                    '[Cmd-Remove] S3 remove warning: file "{}" does not exist in S3'
                    .format(aws_file_name))
            else:
                key.delete()
                Logger.info(
                    '[Cmd-Remove] File "{}" was removed from S3'.format(
                        aws_file_name))
        pass
Ejemplo n.º 26
0
    def __init__(self, cmd_args, settings, stdout, stderr, shell=False):
        self._settings = settings

        stemps_before = self.data_file_timesteps()

        Logger.debug(
            u'[Repository change] Exec command: {}. stdout={}, stderr={}, shell={}'
            .format(u' '.join(cmd_args), stdout, stderr, shell))
        Executor.exec_cmd_only_success(cmd_args, stdout, stderr, shell=shell)

        stemps_after = self.data_file_timesteps()

        sym_diff = stemps_after ^ stemps_before
        self._modified_content_filenames = set(
            [filename for filename, timestemp in sym_diff])

        Logger.debug(
            u'[Repository change] Identified modifications: {}'.format(
                u', '.join(self._modified_content_filenames)))

        self._stated_data_items = []
        self._externally_created_files = []
        self._created_status_files = []
        self._init_file_states()
Ejemplo n.º 27
0
    def reproduce(self, changed_files):
        Logger.debug('Reproduce data item {}. recursive={}, force={}'.format(
            self._data_item.data.relative, self._recursive, self._force))

        if self.state.locked:
            Logger.debug('Data item {} is not reproducible'.format(
                self._data_item.data.relative))
            return False

        if self.is_repro_required(changed_files, self._data_item):
            if self._data_item.data.dvc not in changed_files:
                Logger.debug('Data item {} is going to be reproduced'.format(
                    self._data_item.data.relative))
                self.reproduce_data_item(changed_files)
                changed_files.add(self._data_item.data.dvc)
                return True
            else:
                msg = 'Data item {} is not going to be reproduced because it is already reproduced'
                Logger.debug(msg.format(self._data_item.data.relative))
        else:
            Logger.debug('Data item {} is up to date'.format(
                self._data_item.data.relative))
            return False
        pass
Ejemplo n.º 28
0
    def reproduce_data_item(self, changed_files):
        Logger.debug('Reproducing data item {}.'.format(self._data_item.data.dvc))

        for output_dvc in self._state.output_files:
            Logger.debug('Removing output file {} before reproduction.'.format(output_dvc))

            try:
                data_item = self.cmd_obj.settings.path_factory.existing_data_item_from_dvc_path(output_dvc)
                os.remove(data_item.data.relative)
            except Exception as ex:
                msg = 'Data item {} cannot be removed before reproduction: {}'
                Logger.error(msg.format(output_dvc, ex))

            changed_files.add(output_dvc)

        if self.state.is_import_file:
            Logger.debug('Reproducing data item {}. Re-import cmd: {}'.format(
                self._data_item.data.relative, ' '.join(self.state.argv)))

            if len(self.state.argv) != 2:
                msg = 'Data item "{}" cannot be re-imported because of arguments number {} is incorrect. Argv: {}'
                raise ReproError(msg.format(self._data_item.data.relative, len(self.state.argv), self.state.argv))

            input = self.state.argv[0]
            output = self.state.argv[1]

            cmd = CmdImportFile(self._settings)
            cmd.set_git_action(True)
            cmd.set_locker(False)

            Logger.info(u'Reproducing import command: {}'.format(output))
            if cmd.import_and_commit_if_needed(input, output, lock=True, check_if_ready=False) != 0:
                raise ReproError('Import command reproduction failed')
            return True
        elif self.state.is_run:
            cmd = CmdRun(self._settings)
            cmd.set_git_action(True)
            cmd.set_locker(False)

            Logger.info('Reproducing run command for data item {}. Args: {}'.format(
                self._data_item.data.relative, ' '.join(self.state.argv)))

            data_items_from_args, not_data_items_from_args = self.cmd_obj.argv_files_by_type(self.state.argv)
            if cmd.run_and_commit_if_needed(self.state.argv,
                                            data_items_from_args,
                                            not_data_items_from_args,
                                            self.state.stdout,
                                            self.state.stderr,
                                            self.state.shell,
                                            check_if_ready=False) != 0:
                raise ReproError('Run command reproduction failed')
            return True
        else:
            # Ignore EMPTY_FILE command
            pass
        pass
Ejemplo n.º 29
0
Archivo: updater.py Proyecto: roysh/dvc
    def check(self):
        if os.getenv('CI'):
            return

        if os.path.isfile(self.updater_file):
            ctime = os.path.getctime(self.updater_file)
            if time.time() - ctime < self.TIMEOUT:
                msg = '{} is not old enough to check for updates'
                Logger.debug(msg.format(self.UPDATER_FILE))
                return

            os.unlink(self.updater_file)

        Logger.info('Checking for updates...')

        try:
            self._get_latest_version()
        except Exception as exc:
            msg = 'Failed to obtain latest version: {}'.format(str(exc))
            Logger.debug(msg)
            return

        if self._is_outdated():
            self._notify()
Ejemplo n.º 30
0
    def link(self, cache, path):
        assert os.path.isfile(cache)

        dname = os.path.dirname(path)
        if not os.path.exists(dname):
            os.makedirs(dname)

        # NOTE: just create an empty file for an empty cache
        if os.path.getsize(cache) == 0:
            open(path, 'w+').close()
            msg = "Created empty file: {} -> {}"
            self.project.logger.info(
                msg.format(os.path.relpath(cache), os.path.relpath(path)))
            return

        i = len(self.cache_types)
        while i > 0:
            try:
                self.CACHE_TYPE_MAP[self.cache_types[0]](cache, path)

                if self.protected:
                    os.chmod(path, stat.S_IREAD | stat.S_IRGRP | stat.S_IROTH)

                msg = "Created {}'{}': {} -> {}"
                Logger.info(
                    msg.format('protected ' if self.protected else '',
                               self.cache_types[0], os.path.relpath(cache),
                               os.path.relpath(path)))
                return
            except DvcException as exc:
                msg = 'Cache type \'{}\' is not supported: {}'
                Logger.debug(msg.format(self.cache_types[0], str(exc)))
                del self.cache_types[0]
                i -= 1

        raise DvcException('No possible cache types left to try out.')