Ejemplo n.º 1
0
    def _setup_state(self, update_collection):
        """
        Create a new cache file or load a previously existing one.
        """
        # Load an already existing collection for update
        if update_collection and re.match(arvados.util.collection_uuid_pattern,
                                          update_collection):
            try:
                self._remote_collection = arvados.collection.Collection(update_collection)
            except arvados.errors.ApiError as error:
                raise CollectionUpdateError("Cannot read collection {} ({})".format(update_collection, error))
            else:
                self.update = True
        elif update_collection:
            # Collection locator provided, but unknown format
            raise CollectionUpdateError("Collection locator unknown: '{}'".format(update_collection))

        if self.use_cache:
            # Set up cache file name from input paths.
            md5 = hashlib.md5()
            md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode())
            realpaths = sorted(os.path.realpath(path) for path in self.paths)
            md5.update(b'\0'.join([p.encode() for p in realpaths]))
            if self.filename:
                md5.update(self.filename.encode())
            cache_filename = md5.hexdigest()
            cache_filepath = os.path.join(
                arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
                cache_filename)
            if self.resume and os.path.exists(cache_filepath):
                self.logger.info("Resuming upload from cache file {}".format(cache_filepath))
                self._cache_file = open(cache_filepath, 'a+')
            else:
                # --no-resume means start with a empty cache file.
                self.logger.info("Creating new cache file at {}".format(cache_filepath))
                self._cache_file = open(cache_filepath, 'w+')
            self._cache_filename = self._cache_file.name
            self._lock_file(self._cache_file)
            self._cache_file.seek(0)

        with self._state_lock:
            if self.use_cache:
                try:
                    self._state = json.load(self._cache_file)
                    if not set(['manifest', 'files']).issubset(set(self._state.keys())):
                        # Cache at least partially incomplete, set up new cache
                        self._state = copy.deepcopy(self.EMPTY_STATE)
                except ValueError:
                    # Cache file empty, set up new cache
                    self._state = copy.deepcopy(self.EMPTY_STATE)
            else:
                self.logger.info("No cache usage requested for this run.")
                # No cache file, set empty state
                self._state = copy.deepcopy(self.EMPTY_STATE)
            # Load the previous manifest so we can check if files were modified remotely.
            self._local_collection = arvados.collection.Collection(self._state['manifest'], replication_desired=self.replication_desired, put_threads=self.put_threads)
Ejemplo n.º 2
0
 def make_path(cls, args):
     md5 = hashlib.md5()
     md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost'))
     realpaths = sorted(os.path.realpath(path) for path in args.paths)
     md5.update('\0'.join(realpaths))
     if any(os.path.isdir(path) for path in realpaths):
         md5.update(str(max(args.max_manifest_depth, -1)))
     elif args.filename:
         md5.update(args.filename)
     return os.path.join(
         arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700, 'raise'),
         md5.hexdigest())
Ejemplo n.º 3
0
 def make_path(cls, args):
     md5 = hashlib.md5()
     md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost'))
     realpaths = sorted(os.path.realpath(path) for path in args.paths)
     md5.update('\0'.join(realpaths))
     if any(os.path.isdir(path) for path in realpaths):
         md5.update(str(max(args.max_manifest_depth, -1)))
     elif args.filename:
         md5.update(args.filename)
     return os.path.join(
         arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700, 'raise'),
         md5.hexdigest())
Ejemplo n.º 4
0
 def make_path(cls, args):
     md5 = hashlib.md5()
     md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode())
     realpaths = sorted(os.path.realpath(path) for path in args.paths)
     md5.update(b'\0'.join([p.encode() for p in realpaths]))
     if any(os.path.isdir(path) for path in realpaths):
         md5.update(b'-1')
     elif args.filename:
         md5.update(args.filename.encode())
     return os.path.join(
         arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700, 'raise'),
         md5.hexdigest())
Ejemplo n.º 5
0
 def _get_cache_filepath(self):
     # Set up cache file name from input paths.
     md5 = hashlib.md5()
     md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode())
     realpaths = sorted(os.path.realpath(path) for path in self.paths)
     md5.update(b'\0'.join([p.encode() for p in realpaths]))
     if self.filename:
         md5.update(self.filename.encode())
     cache_filename = md5.hexdigest()
     cache_filepath = os.path.join(
         arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
         cache_filename)
     return cache_filepath
Ejemplo n.º 6
0
def prep_image_file(filename):
    # Return a file object ready to save a Docker image,
    # and a boolean indicating whether or not we need to actually save the
    # image (False if a cached save is available).
    cache_dir = arv_cmd.make_home_conf_dir(os.path.join(".cache", "arvados", "docker"), 0o700)
    if cache_dir is None:
        image_file = tempfile.NamedTemporaryFile(suffix=".tar")
        need_save = True
    else:
        file_path = os.path.join(cache_dir, filename)
        try:
            with open(stat_cache_name(file_path)) as statfile:
                prev_stat = json.load(statfile)
            now_stat = os.stat(file_path)
            need_save = any(prev_stat[field] != now_stat[field] for field in [ST_MTIME, ST_SIZE])
        except STAT_CACHE_ERRORS + (AttributeError, IndexError):
            need_save = True  # We couldn't compare against old stats
        image_file = open(file_path, "w+b" if need_save else "rb")
    return image_file, need_save
Ejemplo n.º 7
0
def prep_image_file(filename):
    # Return a file object ready to save a Docker image,
    # and a boolean indicating whether or not we need to actually save the
    # image (False if a cached save is available).
    cache_dir = arv_cmd.make_home_conf_dir(
        os.path.join('.cache', 'arvados', 'docker'), 0o700)
    if cache_dir is None:
        image_file = tempfile.NamedTemporaryFile(suffix='.tar')
        need_save = True
    else:
        file_path = os.path.join(cache_dir, filename)
        try:
            with open(stat_cache_name(file_path)) as statfile:
                prev_stat = json.load(statfile)
            now_stat = os.stat(file_path)
            need_save = any(prev_stat[field] != now_stat[field]
                            for field in [ST_MTIME, ST_SIZE])
        except STAT_CACHE_ERRORS + (AttributeError, IndexError):
            need_save = True  # We couldn't compare against old stats
        image_file = open(file_path, 'w+b' if need_save else 'rb')
    return image_file, need_save
Ejemplo n.º 8
0
def get_cache_dir():
    return arv_cmd.make_home_conf_dir(
        os.path.join('.cache', 'arvados', 'docker'), 0o700)
Ejemplo n.º 9
0
def get_cache_dir():
    return arv_cmd.make_home_conf_dir(
        os.path.join('.cache', 'arvados', 'docker'), 0o700)
Ejemplo n.º 10
0
 def setup_user_cache(cls):
     return arv_cmd.make_home_conf_dir(cls.CACHE_DIR, 0o700)