Exemple #1
0
    def collect_dir_cache(self, dname):
        dir_info = []

        db = self.state.load()
        bar = False
        for root, dirs, files in os.walk(dname):
            if len(files) > LARGE_DIR_SIZE:
                msg = "Computing md5 for a large directory {}. " \
                      "This is only done once."
                Logger.info(msg.format(os.path.relpath(dname)))
                bar = True
                title = os.path.relpath(dname)
                processed = 0
                total = len(files)
                progress.update_target(title, 0, total)

            for fname in files:
                path = os.path.join(root, fname)
                relpath = self.unixpath(os.path.relpath(path, dname))

                if bar:
                    progress.update_target(title, processed, total)
                    processed += 1

                md5 = self.state.update(path, use_db=db)
                dir_info.append({self.PARAM_RELPATH: relpath,
                                 self.PARAM_MD5: md5})

        db.commit()
        db.close()

        if bar:
            progress.finish_target(title)

        # NOTE: sorting the list by path to ensure reproducibility
        dir_info = sorted(dir_info, key=itemgetter(self.PARAM_RELPATH))

        md5 = dict_md5(dir_info) + self.MD5_DIR_SUFFIX
        if self.changed_cache(md5):
            self.dump_dir_cache(md5, dir_info)

        return (md5, dir_info)
Exemple #2
0
    def run(self):
        if not self.no_git_actions and not self.git.is_ready_to_go():
            return 1

        if os.path.realpath(os.path.curdir) != self.settings.git.git_dir_abs:
            Logger.error(
                'DVC error: initialization could be done only from git root directory {}'
                .format(self.settings.git.git_dir_abs))
            return 1

        config_dir_path = self.get_not_existing_path(Config.CONFIG_DIR)
        data_dir_path = self.get_not_existing_path(self.parsed_args.data_dir)
        cache_dir_path = self.get_not_existing_path(Config.CONFIG_DIR,
                                                    Config.CACHE_DIR)
        state_dir_path = self.get_not_existing_path(Config.CONFIG_DIR,
                                                    Config.STATE_DIR)

        self.settings.config.set(self.parsed_args.data_dir)

        conf_file_name = self.get_not_existing_conf_file_name()

        config_dir_path.mkdir()
        data_dir_path.mkdir()
        cache_dir_path.mkdir()
        state_dir_path.mkdir()
        Logger.info('Directories {}/, {}/, {}/, {}/ were created'.format(
            config_dir_path.name, data_dir_path.name, cache_dir_path.name,
            state_dir_path.name))

        self.create_empty_file()

        conf_file = open(conf_file_name, 'wt')
        conf_file.write(self.CONFIG_TEMPLATE.format(data_dir_path.name))
        conf_file.close()

        message = 'DVC init. data dir {}, cache dir {}, state dir {}, '.format(
            data_dir_path.name, cache_dir_path.name, state_dir_path.name)
        if self.commit_if_needed(message) == 1:
            return 1

        self.modify_gitignore(config_dir_path.name, cache_dir_path.name)
        return self.commit_if_needed('DVC init. Commit .gitignore file')
Exemple #3
0
    def link(self, cache, path):
        assert os.path.isfile(cache)

        dname = os.path.dirname(path)
        if not os.path.exists(dname):
            os.makedirs(dname)

        i = len(self.cache_types)
        while i > 0:
            try:
                self.CACHE_TYPE_MAP[self.cache_types[0]](cache, path)
                return
            except Exception as exc:
                msg = 'Cache type \'{}\' is not supported'.format(
                    self.cache_types[0])
                Logger.debug(msg)
                del self.cache_types[0]
                i -= 1

        raise DvcException('No possible cache types left to try out.')
Exemple #4
0
    def _resume_multipart(self, key, fname):
        """
        Try resuming multipart upload.
        """
        try:
            mp_id = open(self._upload_tracker(fname), 'r').read()
        except Exception as exc:
            Logger.debug(
                "Failed to read upload tracker file for {}: {}".format(
                    fname, exc))
            return None

        for part in key.bucket.get_all_multipart_uploads():
            if part.id != mp_id:
                continue

            Logger.debug("Found existing multipart {}".format(mp_id))
            return part

        return None
Exemple #5
0
    def check_opt(self):
        _section, _opt = self.parsed_args.name.strip().split('.', 1)
        add = (self.parsed_args.value != None
               and self.parsed_args.unset == False)

        section = self._get_key(self.configobj, _section, add)

        if not section:
            Logger.error('Invalid option name {}'.format(_section))
            return 1

        opt = self._get_key(self.configobj[section], _opt, add)
        if not opt:
            Logger.error('Invalid option value: {}'.format(_opt))
            return 1

        self.section = section
        self.opt = opt

        return 0
Exemple #6
0
    def run(self):
        with DvcLock(self.is_locker, self.git):
            name = self.parsed_args.name
            cloud = self.parsed_args.cloud or self.settings.config.cloud

            # print('NAME: {}'.format(name))
            # print('CLOUD: {}'.format(cloud))

            if not name:
                Logger.error('Instance name is not defined')
                return 1

            try:
                InstanceManager().create(name, cloud, self.parsed_args,
                                         self.settings.config)
            except DvcException as ex:
                Logger.error('Instance creation error: {}'.format(ex))
                return 1

            return 0
Exemple #7
0
    def load_dir_cache(path):
        if os.path.isabs(path):
            relpath = os.path.relpath(path)
        else:
            relpath = path

        try:
            with open(path, 'r') as fd:
                d = json.load(fd)
        except Exception as exc:
            msg = u'Failed to load dir cache \'{}\''
            Logger.error(msg.format(relpath), exc)
            return []

        if not isinstance(d, list):
            msg = u'Dir cache file format error \'{}\': skipping the file'
            Logger.error(msg.format(relpath))
            return []

        return d
Exemple #8
0
    def upload(self, paths, path_infos, names=None):
        names = self._verify_path_args(path_infos, paths, names)

        for path, path_info, name in zip(paths, path_infos, names):
            if path_info['scheme'] != 'local':
                raise NotImplementedError

            Logger.debug("Uploading '{}' to '{}'".format(
                path, path_info['path']))

            if not name:
                name = os.path.basename(path)

            self._makedirs(path_info['path'])

            try:
                copyfile(path, path_info['path'], name=name)
            except Exception as exc:
                Logger.error("Failed to upload '{}' tp '{}'".format(
                    path, path_info['path']))
Exemple #9
0
    def link(self, src, link):
        dname = os.path.dirname(link)
        if not os.path.exists(dname):
            os.makedirs(dname)

        if self.cache_type != None:
            types = [self.cache_type]
        else:
            types = self.CACHE_TYPES

        for typ in types:
            try:
                self.CACHE_TYPE_MAP[typ](src, link)
                self.link_state.update(link)
                return
            except Exception as exc:
                msg = 'Cache type \'{}\' is not supported'.format(typ)
                Logger.debug(msg)
                if typ == types[-1]:
                    raise DvcException(msg, cause=exc)
Exemple #10
0
    def ignore(self, path):
        entry, gitignore = self._get_gitignore(path)

        ignore_list = []
        if os.path.exists(gitignore):
            ignore_list = open(gitignore, 'r').readlines()
            filtered = list(
                filter(lambda x: x.strip() == entry.strip(), ignore_list))
            if len(filtered) != 0:
                return

        msg = "Adding '{}' to '{}'.".format(os.path.relpath(path),
                                            os.path.relpath(gitignore))
        Logger.info(msg)

        content = entry
        if len(ignore_list) > 0:
            content = '\n' + content

        open(gitignore, 'a').write(content)
Exemple #11
0
    def __init__(self, root_dir):
        self.root_dir = os.path.abspath(os.path.realpath(root_dir))
        self.dvc_dir = os.path.join(self.root_dir, self.DVC_DIR)

        self.config = Config(self.dvc_dir)
        self.scm = SCM(self.root_dir)
        self.lock = Lock(self.dvc_dir)
        # NOTE: storing state and link_state in the repository itself to avoid
        # any possible state corruption in 'shared cache dir' scenario.
        self.state = State(self)
        self.link_state = LinkState(self)
        self.logger = Logger(self.config._config[Config.SECTION_CORE].get(
            Config.SECTION_CORE_LOGLEVEL, None))
        self.cache = Cache(self)
        self.cloud = DataCloud(self, config=self.config._config)
        self.updater = Updater(self.dvc_dir)

        self._ignore()

        self.updater.check()
Exemple #12
0
    def download(self, path_infos, fnames, no_progress_bar=False, names=None):
        names = self._verify_path_args(path_infos, fnames, names)

        session = boto3.session.Session()
        s3 = session.client('s3')

        for fname, path_info, name in zip(fnames, path_infos, names):
            if path_info['scheme'] != 's3':
                raise NotImplementedError

            Logger.debug("Downloading '{}/{}' to '{}'".format(
                path_info['bucket'], path_info['key'], fname))

            tmp_file = self.tmp_file(fname)
            if not name:
                name = os.path.basename(fname)

            if no_progress_bar:
                cb = None
            else:
                total = s3.head_object(Bucket=path_info['bucket'],
                                       Key=path_info['key'])['ContentLength']
                cb = Callback(name, total)

            self._makedirs(fname)

            try:
                s3.download_file(path_info['bucket'],
                                 path_info['key'],
                                 tmp_file,
                                 Callback=cb)
            except Exception as exc:
                Logger.error(
                    "Failed to download '{}/{}'".format(
                        path_info['bucket'], path_info['key']), exc)
                return

            os.rename(tmp_file, fname)

            if not no_progress_bar:
                progress.finish_target(name)
Exemple #13
0
    def download(self,
                 from_infos,
                 to_infos,
                 no_progress_bar=False,
                 names=None):
        names = self._verify_path_args(from_infos, to_infos, names)

        for to_info, from_info, name in zip(to_infos, from_infos, names):
            if from_info['scheme'] != self.scheme:
                raise NotImplementedError

            if to_info['scheme'] != 'local':
                raise NotImplementedError

            bucket = from_info['bucket']
            key = from_info['key']

            Logger.debug("Downloading '{}/{}' to '{}'".format(
                bucket, key, to_info['path']))

            tmp_file = self.tmp_file(to_info['path'])
            if not name:
                name = os.path.basename(to_info['path'])

            cb = None if no_progress_bar else Callback(name)

            self._makedirs(to_info['path'])

            try:
                self.blob_service.get_blob_to_path(bucket,
                                                   key,
                                                   tmp_file,
                                                   progress_callback=cb)
            except Exception as exc:
                msg = "Failed to download '{}/{}'".format(bucket, key)
                Logger.warn(msg, exc)
            else:
                os.rename(tmp_file, to_info['path'])

                if not no_progress_bar:
                    progress.finish_target(name)
Exemple #14
0
    def reproduce(self, changed_files):
        Logger.debug('Reproduce data item {}. recursive={}, force={}'.format(
            self._data_item.data.relative, self._recursive, self._force))

        if self.state.locked:
            Logger.debug('Data item {} is not reproducible'.format(self._data_item.data.relative))
            return False

        if self.is_repro_required(changed_files, self._data_item):
            if self._data_item.data.dvc not in changed_files:
                Logger.debug('Data item {} is going to be reproduced'.format(self._data_item.data.relative))
                self.reproduce_data_item(changed_files)
                changed_files.add(self._data_item.data.dvc)
                return True
            else:
                msg = 'Data item {} is not going to be reproduced because it is already reproduced'
                Logger.debug(msg.format(self._data_item.data.relative))
        else:
            Logger.debug('Data item {} is up to date'.format(self._data_item.data.relative))
            return False
        pass
Exemple #15
0
    def _import(self, target):
        url, item = target
        o = urlparse(url)

        typ = self.SCHEME_MAP.get(o.scheme, None)
        if typ == None:
            Logger.error('Not supported scheme \'{}\''.format(o.scheme))
            return None

        #To handle ConfigI case
        if not hasattr(self._settings.config, '_config'):
            self._config = None
            cloud_settings = None
        else:
            self._config = self._settings.config._config
            cloud_settings = self.get_cloud_settings(
                self._config, typ, self._settings.path_factory)

        cloud = self.CLOUD_MAP[typ](cloud_settings)

        return cloud.import_data(url, item)
Exemple #16
0
    def _sync_to_cloud_aws(self, data_item):
        """ sync_to_cloud, aws version """

        aws_key = self.cache_file_key(data_item.resolved_cache.dvc)
        bucket = self._get_bucket_aws()
        key = bucket.get_key(aws_key)
        if key:
            Logger.debug('File already uploaded to the cloud. Checksum validation...')

            md5_cloud = key.etag[1:-1]
            md5_local = file_md5(data_item.resolved_cache.dvc)[0]
            if md5_cloud == md5_local:
                Logger.debug('File checksum matches. No uploading is needed.')
                return

            Logger.debug('Checksum miss-match. Re-uploading is required.')

        Logger.info('Uploading cache file "{}" to S3 "{}"'.format(data_item.resolved_cache.relative, aws_key))
        key = bucket.new_key(aws_key)
        key.set_contents_from_filename(data_item.resolved_cache.relative, cb=percent_cb)
        Logger.info('Uploading completed')
Exemple #17
0
    def _read_metric_from_state_file(self, hash, target, settings):
        try:
            data_item = settings.path_factory.data_item(target)
        except DataItemError as ex:
            Logger.warn('Target file {} is not data item: {}'.format(
                target, ex))
            return None

        try:
            cmd_corresponded_state_file = [
                'git', 'show', '{}:{}'.format(hash, data_item.state.relative)
            ]
            state_file_content = Executor.exec_cmd_only_success(
                cmd_corresponded_state_file)
        except ExecutorError as ex:
            msg = '[dvc-git] Cannot obtain content of target symbolic file {} with hash {}: {}'
            Logger.warn(msg.format(target, hash, ex))
            return None

        state_file = StateFile.loads(state_file_content, settings)
        return state_file.single_target_metric
Exemple #18
0
    def run_command(self,
                    cmd_args,
                    data_items_from_args,
                    not_data_items_from_args,
                    stdout=None,
                    stderr=None,
                    shell=False):
        Logger.debug(
            'Run command with args: {}. Data items from args: {}. stdout={}, stderr={}, shell={}'
            .format(' '.join(cmd_args),
                    ', '.join([x.data.dvc for x in data_items_from_args]),
                    stdout, stderr, shell))

        repo_change = RepositoryChange(cmd_args,
                                       self.settings,
                                       stdout,
                                       stderr,
                                       shell=shell)

        if not self.no_git_actions and not self._validate_file_states(
                repo_change):
            self.remove_new_files(repo_change)
            raise RunError('Errors occurred.')

        output_set = set(self.declaration_output_data_items +
                         repo_change.changed_data_items)
        output_files_dvc = [x.data.dvc for x in output_set]

        input_set = set(data_items_from_args +
                        self.declaration_input_data_items) - output_set
        input_files_dvc = [x.data.dvc for x in input_set]

        code_dependencies_dvc = self.git.abs_paths_to_dvc(
            self.code_dependencies + not_data_items_from_args)

        result = []
        for data_item in repo_change.changed_data_items:
            Logger.debug(
                'Move output file "{}" to cache dir "{}" and create a symlink'.
                format(data_item.data.relative, data_item.cache.relative))
            data_item.move_data_to_cache()

            Logger.debug('Create state file "{}"'.format(
                data_item.state.relative))

            state_file = StateFile(StateFile.COMMAND_RUN,
                                   data_item.state.relative,
                                   self.settings,
                                   input_files_dvc,
                                   output_files_dvc,
                                   code_dependencies_dvc,
                                   argv=cmd_args,
                                   lock=self.lock,
                                   stdout=self._stdout_to_dvc(stdout),
                                   stderr=self._stdout_to_dvc(stderr),
                                   shell=shell)
            state_file.save()
            result.append(state_file)

        return result
Exemple #19
0
    def checkout(self, path_info, checksum_info):
        path = path_info['path']
        md5 = checksum_info.get(self.PARAM_MD5, None)
        cache = self.get(md5)

        if not cache:
            Logger.warn('No cache info for \'{}\'. Skipping checkout.'.format(
                os.path.relpath(path)))
            return

        if os.path.exists(path):
            msg = u'Data \'{}\' exists. Removing before checkout'
            Logger.debug(msg.format(os.path.relpath(path)))
            remove(path)

        msg = u'Checking out \'{}\' with cache \'{}\''
        Logger.debug(msg.format(os.path.relpath(path), md5))

        if not self.is_dir_cache(cache):
            self.link(md5, path, dump=True)
            return

        # Create dir separately so that dir is created
        # even if there are no files in it
        if not os.path.exists(path):
            os.makedirs(path)

        for entry in self.load_dir_cache(cache):
            md5 = entry[self.PARAM_MD5]
            relpath = entry[self.PARAM_RELPATH]
            p = os.path.join(path, relpath)
            self.link(md5, p, dump=False)
        self.link_state.dump()
Exemple #20
0
    def create_state_files(self, targets, lock):
        """
        Create state files for all targets.
        """
        for t in targets:
            orig_target, processed_data_item = t
            input, data_item = orig_target
            output = data_item.data.relative

            if processed_data_item == None:
                Logger.debug(
                    'Skipping creating state file for failed import {}'.format(
                        data_item.state.relative))
                continue

            Logger.debug('Creating symlink {} --> {}'.format(
                data_item.symlink_file, data_item.data.relative))
            System.symlink(data_item.symlink_file, data_item.data.relative)

            state_file = StateFile(StateFile.COMMAND_IMPORT_FILE,
                                   data_item,
                                   self.settings,
                                   argv=[input, output],
                                   input_files=[],
                                   output_files=[output],
                                   lock=lock)
            state_file.save()
            Logger.debug('State file "{}" was created'.format(
                data_item.state.relative))
Exemple #21
0
    def terminate_instances(self, instance):
        instance = self.get_instance_id(instance)

        if not instance:
            Logger.error('Instance Id is not specified')
            return

        (active, not_active, rest_inst) = self.all_instances()

        if instance == 'all':
            target_in_active = active
            target_in_not_active = not_active
        else:
            target_in_active = list(
                filter(lambda inst: inst.id == instance, active))
            target_in_not_active = list(
                filter(lambda inst: inst.id == instance, not_active))

        if target_in_not_active:
            self._conn.terminate_instances(
                instance_ids=[inst.id for inst in target_in_not_active])

        for inst in target_in_active:
            inst.remove_tag(self.INSTANCE_STATE_TAG, 'True')
            inst.add_tag(self.INSTANCE_STATE_TAG, 'False')
            self._conn.terminate_instances(instance_ids=[inst.id])

        if instance != 'all' and len(target_in_active) > 0 and len(
                not_active) > 0:
            new_active_inst = not_active[0]
            new_active_inst.remove_tag(self.INSTANCE_STATE_TAG, 'False')
            new_active_inst.add_tag(self.INSTANCE_STATE_TAG, 'True')
            randomly = ''
            if len(not_active) > 1:
                randomly = 'randomly '
            Logger.error(
                '{} instance {} was {} selected as active because of an active instance was terminated'
                .format(new_active_inst.instance_type, new_active_inst.id,
                        randomly))
        pass
Exemple #22
0
    def _checkout(self, path, md5):
        cache = self.get(md5)

        if not cache or not os.path.exists(cache) or self._changed(md5):
            if cache:
                Logger.warn(u'\'{}({})\': cache file not found'.format(
                    os.path.relpath(cache), os.path.relpath(path)))
            remove(path)
            return

        if os.path.exists(path):
            msg = u'Data \'{}\' exists. Removing before checkout'
            Logger.debug(msg.format(os.path.relpath(path)))
            remove(path)

        msg = u'Checking out \'{}\' with cache \'{}\''
        Logger.debug(msg.format(os.path.relpath(path), os.path.relpath(cache)))

        if not self.is_dir_cache(cache):
            self.link(cache, path)
            return

        dir_cache = self.dir_cache(cache)
        for relpath, c in dir_cache.items():
            p = os.path.join(path, relpath)
            self.link(c, p)
Exemple #23
0
    def read_metrics(self, fname, branch):
        try:
            lines = self.git.get_file_content(fname, branch).split('\n')
        except ExecutorError as ex:
            msg = 'Unable to read metrics file from branch {}: {}'
            data_item = self.settings.path_factory.data_item(fname)

            try:
                self.git.get_file_content(data_item.state.relative, branch)
                Logger.error(
                    msg.format(branch, 'this is data file, not metric file'))
            except ExecutorError:
                Logger.error(
                    msg.format(branch, 'file does not exist in this branch'))
                return None

            return None

        metric = utils.parse_target_metric(lines)
        if not metric:
            msg = 'Unable to parse metrics from the first line of file {} in branch {}'
            Logger.error(msg.format(fname, branch))
            return None

        return metric
Exemple #24
0
    def check(self):
        current = VERSION_BASE

        if os.getenv('CI'):
            return

        if os.path.isfile(self.updater_file):
            ctime = os.path.getctime(self.updater_file)
            if time.time() - ctime < self.TIMEOUT:
                msg = '{} is not old enough to check for updates'
                Logger.debug(msg.format(self.UPDATER_FILE))
                return

            os.unlink(self.updater_file)

        try:
            r = requests.get(self.URL)
            j = r.json()
            latest = j['version']
            open(self.updater_file, 'w+').close()
        except Exception as exc:
            Logger.debug('Failed to obtain latest version: {}'.format(
                str(exc)))
            return

        l_major, l_minor, l_patch = latest.split('.')
        c_major, c_minor, c_patch = current.split('.')

        if l_major <= c_major and \
           l_minor <= c_minor and \
           l_patch <= c_patch:
            return

        msg = 'You are using dvc version {}, however version {} is available. Consider upgrading.'
        Logger.warn(msg.format(current, latest))
Exemple #25
0
    def push(self, data_item):
        """ push, gcp version """

        bucket = self._get_bucket_gc(self.storage_bucket)
        blob_name = self.cache_file_key(data_item.resolved_cache.dvc)
        name = os.path.basename(data_item.resolved_cache.dvc)

        blob = bucket.get_blob(blob_name)
        if blob is not None and blob.exists():
            if self._cmp_checksum(blob, data_item.resolved_cache.dvc):
                Logger.debug('checksum %s matches.  Skipping upload' %
                             data_item.cache.relative)
                return data_item
            Logger.debug('checksum %s mismatch.  re-uploading' %
                         data_item.cache.relative)

        # same as in _import
        progress.update_target(name, 0, None)

        blob = bucket.blob(blob_name)
        blob.upload_from_filename(data_item.resolved_cache.relative)

        progress.finish_target(name)
        Logger.debug('uploading %s completed' %
                     data_item.resolved_cache.relative)

        return data_item
Exemple #26
0
    def run(self):
        if self.is_locker:
            lock = fasteners.InterProcessLock(self.git.lock_file)
            gotten = lock.acquire(timeout=5)
            if not gotten:
                Logger.info('Cannot perform the cmd since DVC is busy and locked. Please retry the cmd later.')
                return 1

        good = self.config.sanity_check()

        if not good[0]:
            Logger.error('config \'%s\' is not correctly setup.  Please fix:' % Runtime.CONFIG)
            for e in good[1]:
                Logger.error('    ' + e)
            return 1

        try:
            for target in self.parsed_args.targets:
                data_item = self.settings.path_factory.data_item(target)
                if System.islink(target):
                    self.sync_symlink(data_item)
                elif os.path.isdir(target):
                    self.sync_dir(target)
                else:
                    raise DataSyncError('File "{}" does not exit'.format(target))
        finally:
            if self.is_locker:
                lock.release()
Exemple #27
0
    def checkout(self, path_info, checksum_info, dump=True):
        path = path_info['path']
        md5 = checksum_info.get(self.PARAM_MD5, None)
        cache = self.get(md5)

        if not cache or not os.path.exists(cache) or self.changed(md5):
            if cache:
                Logger.warn(u'\'{}({})\': cache file not found'.format(
                    os.path.relpath(cache), os.path.relpath(path)))
            remove(path)
            return

        if os.path.exists(path):
            msg = u'Data \'{}\' exists. Removing before checkout'
            Logger.debug(msg.format(os.path.relpath(path)))
            remove(path)

        if not self.is_dir_cache(cache):
            self.link(cache, path, dump=dump)
            return

        msg = u'Checking out directory \'{}\' with cache \'{}\''
        Logger.debug(msg.format(os.path.relpath(path), os.path.relpath(cache)))

        # Create dir separately so that dir is created
        # even if there are no files in it
        if not os.path.exists(path):
            os.makedirs(path)

        dir_cache = self.dir_cache(cache)
        for relpath, c in dir_cache.items():
            p = os.path.join(path, relpath)
            self.link(c, p, dump=dump)
Exemple #28
0
    def upload(self, from_infos, to_infos, names=None):
        names = self._verify_path_args(to_infos, from_infos, names)

        ssh = self.ssh(host=to_infos[0]['host'],
                       user=to_infos[0]['user'],
                       port=to_infos[0]['port'])
        sftp = ssh.open_sftp()

        for from_info, to_info, name in zip(from_infos, to_infos, names):
            if to_info['scheme'] != 'ssh':
                raise NotImplementedError

            if from_info['scheme'] != 'local':
                raise NotImplementedError

            Logger.debug("Uploading '{}' to '{}/{}'".format(from_info['path'],
                                                            to_info['host'],
                                                            to_info['path']))

            if not name:
                name = os.path.basename(from_info['path'])

            dname = posixpath.dirname(to_info['path'])
            self._exec(ssh, 'mkdir -p {}'.format(dname))

            try:
                sftp.put(from_info['path'],
                         to_info['path'],
                         callback=create_cb(name))
            except Exception as exc:
                msg = "Failed to upload '{}' to '{}/{}'"
                Logger.error(msg.format(from_info['path'],
                                        to_info['host'],
                                        to_info['path'], exc))
                continue

            progress.finish_target(name)

        sftp.close()
        ssh.close()
Exemple #29
0
    def load_dir_cache(self, md5):
        path = self.get(md5)

        assert self.is_dir_cache(path)

        try:
            with open(path, 'r') as fd:
                d = json.load(fd)
        except Exception as exc:
            msg = u'Failed to load dir cache \'{}\''
            Logger.error(msg.format(os.path.relpath(path)), exc)
            return []

        if not isinstance(d, list):
            msg = u'Dir cache file format error \'{}\': skipping the file'
            Logger.error(msg.format(os.path.relpath(path)))
            return []

        for info in d:
            info['relpath'] = self.ospath(info['relpath'])

        return d
Exemple #30
0
    def _pull_key(self, key, fname, no_progress_bar=False):
        Logger.debug("Pulling key '{}' from bucket '{}' to file '{}'".format(
            key.name, key.bucket.name, fname))
        self._makedirs(fname)

        tmp_file = self.tmp_file(fname)
        name = os.path.relpath(fname, self._cloud_settings.cache.cache_dir)

        if self._cmp_checksum(key, fname):
            Logger.debug('File "{}" matches with "{}".'.format(
                fname, key.name))
            return fname

        Logger.debug('Downloading cache file from S3 "{}/{}" to "{}"'.format(
            key.bucket.name, key.name, fname))

        if no_progress_bar:
            cb = None
        else:
            cb = create_cb(name)

        res_h = ResumableDownloadHandler(
            tracker_file_name=self._download_tracker(tmp_file), num_retries=10)
        try:
            key.get_contents_to_filename(tmp_file,
                                         cb=cb,
                                         res_download_handler=res_h)
        except Exception as exc:
            Logger.error('Failed to download "{}": {}'.format(key.name, exc))
            return None

        os.rename(tmp_file, fname)

        if not no_progress_bar:
            progress.finish_target(name)

        Logger.debug('Downloading completed')

        return fname