def test_cache_store_get(): tmp_dir = tempfile.mkdtemp() my_cache = cache.Cache(cache_root_dir=tmp_dir) path1 = utils.touch( os.path.join(my_cache.get_cache_dir(101201), "file1.ext")) my_cache.add(file_handle_id=101201, path=path1) path2 = utils.touch( os.path.join(my_cache.get_cache_dir(101202), "file2.ext")) my_cache.add(file_handle_id=101202, path=path2) # set path3's mtime to be later than path2's new_time_stamp = cache._get_modified_time(path2) + 2 path3 = utils.touch(os.path.join(tmp_dir, "foo", "file2.ext"), (new_time_stamp, new_time_stamp)) my_cache.add(file_handle_id=101202, path=path3) a_file = my_cache.get(file_handle_id=101201) assert utils.equal_paths(a_file, path1) a_file = my_cache.get(file_handle_id=101201, path=path1) assert utils.equal_paths(a_file, path1) a_file = my_cache.get(file_handle_id=101201, path=my_cache.get_cache_dir(101201)) assert utils.equal_paths(a_file, path1) b_file = my_cache.get(file_handle_id=101202, path=os.path.dirname(path2)) assert utils.equal_paths(b_file, path2) b_file = my_cache.get(file_handle_id=101202, path=os.path.dirname(path3)) assert utils.equal_paths(b_file, path3) not_in_cache_file = my_cache.get(file_handle_id=101203, path=tmp_dir) assert not_in_cache_file is None removed = my_cache.remove(file_handle_id=101201, path=path1, delete=True) assert utils.normalize_path(path1) in removed assert len(removed) == 1 assert my_cache.get(file_handle_id=101201) is None removed = my_cache.remove(file_handle_id=101202, path=path3, delete=True) b_file = my_cache.get(file_handle_id=101202) assert utils.normalize_path(path3) in removed assert len(removed) == 1 assert utils.equal_paths(b_file, path2) removed = my_cache.remove(file_handle_id=101202, delete=True) assert utils.normalize_path(path2) in removed assert len(removed) == 1 assert my_cache.get(file_handle_id=101202) is None
def test_download_file_entity__correct_local_state(syn): mock_cache_path = utils.normalize_path("/i/will/show/you/the/path/yi.txt") file_entity = File(parentId="syn123") file_entity.dataFileHandleId = 123 with patch.object(syn.cache, 'get', return_value=mock_cache_path): syn._download_file_entity(downloadLocation=None, entity=file_entity, ifcollision="overwrite.local", submission=None) assert mock_cache_path == utils.normalize_path(file_entity.path) assert os.path.dirname(mock_cache_path) == file_entity.cacheDir assert 1 == len(file_entity.files) assert os.path.basename(mock_cache_path) == file_entity.files[0]
def test_normalize_path(): # tests should pass on reasonable OSes and also on windows # resolves relative paths assert len(utils.normalize_path('asdf.txt')) > 8 # doesn't resolve home directory references # assert '~' not in utils.normalize_path('~/asdf.txt') # converts back slashes to forward slashes assert utils.normalize_path('\\windows\\why\\why\\why.txt') # what's the right thing to do for None? assert utils.normalize_path(None) is None
def test_download_local_file_URL_path(syn, project, schedule_for_cleanup): path = utils.make_bogus_data_file() schedule_for_cleanup(path) filehandle = create_external_file_handle(syn, path, mimetype=None, file_size=None) localFileEntity = syn.store(File(dataFileHandleId=filehandle['id'], parent=project)) e = syn.get(localFileEntity.id) assert path == utils.normalize_path(e.path)
def test_store__changing_externalURL_by_changing_path(syn, project, schedule_for_cleanup): url = 'https://www.synapse.org/Portal/clear.cache.gif' ext = syn.store(File(url, name="test", parent=project, synapseStore=False)) # perform a syn.get so the filename changes ext = syn.get(ext) # create a temp file temp_path = utils.make_bogus_data_file() schedule_for_cleanup(temp_path) ext.synapseStore = False ext.path = temp_path ext = syn.store(ext) # do a get to make sure filehandle has been updated correctly ext = syn.get(ext.id, downloadFile=True) assert ext.externalURL != url assert utils.normalize_path(temp_path) == utils.file_url_to_path(ext.externalURL) assert temp_path == utils.normalize_path(ext.path) assert not ext.synapseStore
def add(self, file_handle_id, path): """ Add a file to the cache """ if not path or not os.path.exists(path): raise ValueError("Can't find file \"%s\"" % path) cache_dir = self.get_cache_dir(file_handle_id) with Lock(self.cache_map_file_name, dir=cache_dir): cache_map = self._read_cache_map(cache_dir) path = utils.normalize_path(path) # write .000 milliseconds for backward compatibility cache_map[path] = epoch_time_to_iso( math.floor(_get_modified_time(path))) self._write_cache_map(cache_dir, cache_map) return cache_map
def contains(self, file_handle_id, path): """ Given a file and file_handle_id, return True if an unmodified cached copy of the file exists at the exact path given or False otherwise. :param file_handle_id: :param path: file path at which to look for a cached copy """ cache_dir = self.get_cache_dir(file_handle_id) if not os.path.exists(cache_dir): return False with Lock(self.cache_map_file_name, dir=cache_dir): cache_map = self._read_cache_map(cache_dir) path = utils.normalize_path(path) cached_time = cache_map.get(path, None) if cached_time: return compare_timestamps(_get_modified_time(path), cached_time) return False
def remove(self, file_handle_id, path=None, delete=None): """ Remove a file from the cache. :param file_handle_id: Will also extract file handle id from either a File or file handle :param path: If the given path is None, remove (and potentially delete) all cached copies. If the path is that of a file in the .cacheMap file, remove it. :returns: A list of files removed """ removed = [] cache_dir = self.get_cache_dir(file_handle_id) # if we've passed an entity and not a path, get path from entity if path is None and isinstance( file_handle_id, collections.abc.Mapping) and 'path' in file_handle_id: path = file_handle_id['path'] with Lock(self.cache_map_file_name, dir=cache_dir): cache_map = self._read_cache_map(cache_dir) if path is None: for path in cache_map: if delete is True and os.path.exists(path): os.remove(path) removed.append(path) cache_map = {} else: path = utils.normalize_path(path) if path in cache_map: if delete is True and os.path.exists(path): os.remove(path) del cache_map[path] removed.append(path) self._write_cache_map(cache_dir, cache_map) return removed
def _makeManifest(content, schedule_for_cleanup): with tempfile.NamedTemporaryFile(mode='w', suffix=".dat", delete=False) as f: f.write(content) filepath = utils.normalize_path(f.name) schedule_for_cleanup(filepath) return filepath
def test_cache_rules(): # Cache should (in order of preference): # # 1. DownloadLocation specified: # a. return exact match (unmodified file at the same path) # b. return an unmodified file at another location, # copy to downloadLocation subject to ifcollision # c. download file to downloadLocation subject to ifcollision # # 2. DownloadLocation *not* specified: # a. return an unmodified file at another location # b. download file to cache_dir overwritting any existing file tmp_dir = tempfile.mkdtemp() my_cache = cache.Cache(cache_root_dir=tmp_dir) # put file in cache dir path1 = utils.touch(os.path.join(my_cache.get_cache_dir(101201), "file1.ext")) my_cache.add(file_handle_id=101201, path=path1) new_time_stamp = cache._get_modified_time(path1) + 1 path2 = utils.touch(os.path.join(tmp_dir, "not_in_cache", "file1.ext"), (new_time_stamp, new_time_stamp)) my_cache.add(file_handle_id=101201, path=path2) new_time_stamp = cache._get_modified_time(path2) + 1 path3 = utils.touch(os.path.join(tmp_dir, "also_not_in_cache", "file1.ext"), (new_time_stamp, new_time_stamp)) my_cache.add(file_handle_id=101201, path=path3) # DownloadLocation specified, found exact match assert utils.equal_paths(my_cache.get(file_handle_id=101201, path=path2), path2) # DownloadLocation specified, no match, get most recent path = my_cache.get(file_handle_id=101201, path=os.path.join(tmp_dir, "file_is_not_here", "file1.ext")) assert utils.equal_paths(path, path3) # DownloadLocation specified as a directory, not in cache, get most recent empty_dir = os.path.join(tmp_dir, "empty_directory") os.makedirs(empty_dir) path = my_cache.get(file_handle_id=101201, path=empty_dir) assert utils.equal_paths(path, path3) # path2 is now modified new_time_stamp = cache._get_modified_time(path2) + 2 utils.touch(path2, (new_time_stamp, new_time_stamp)) # test cache.contains assert_false(my_cache.contains(file_handle_id=101201, path=empty_dir)) assert_false(my_cache.contains(file_handle_id=101201, path=path2)) assert_false(my_cache.contains(file_handle_id=101999, path=path2)) assert_true(my_cache.contains(file_handle_id=101201, path=path1)) assert_true(my_cache.contains(file_handle_id=101201, path=path3)) # Get file from alternate location. Do we care which file we get? assert_is_none(my_cache.get(file_handle_id=101201, path=path2)) assert_in(my_cache.get(file_handle_id=101201), [utils.normalize_path(path1), utils.normalize_path(path3)]) # Download uncached file to a specified download location assert_is_none(my_cache.get(file_handle_id=101202, path=os.path.join(tmp_dir, "not_in_cache"))) # No downloadLocation specified, get file from alternate location. Do we care which file we get? assert_is_not_none(my_cache.get(file_handle_id=101201)) assert_in(my_cache.get(file_handle_id=101201), [utils.normalize_path(path1), utils.normalize_path(path3)]) # test case 2b. assert_is_none(my_cache.get(file_handle_id=101202))
def get(self, file_handle_id, path=None): """ Retrieve a file with the given file handle from the cache. :param file_handle_id: :param path: If the given path is None, look for a cached copy of the file in the cache directory. If the path is a directory, look there for a cached copy. If a full file-path is given, only check whether that exact file exists and is unmodified since it was cached. :returns: Either a file path, if an unmodified cached copy of the file exists in the specified location or None if it does not """ cache_dir = self.get_cache_dir(file_handle_id) if not os.path.exists(cache_dir): return None with Lock(self.cache_map_file_name, dir=cache_dir): cache_map = self._read_cache_map(cache_dir) path = utils.normalize_path(path) # If the caller specifies a path and that path exists in the cache # but has been modified, we need to indicate no match by returning # None. The logic for updating a synapse entity depends on this to # determine the need to upload a new file. if path is not None: # If we're given a path to a directory, look for a cached file in that directory if os.path.isdir(path): matching_unmodified_directory = None removed_entry_from_cache = False # determines if cache_map needs to be rewritten to disk # iterate a copy of cache_map to allow modifying original cache_map for cached_file_path, cached_time in dict( cache_map).items(): if path == os.path.dirname(cached_file_path): # compare_timestamps has an implicit check for whether the path exists if compare_timestamps( _get_modified_time(cached_file_path), cached_time): # "break" instead of "return" to write removed invalid entries to disk if necessary matching_unmodified_directory = cached_file_path break else: # remove invalid cache entries pointing to files that that no longer exist # or have been modified del cache_map[cached_file_path] removed_entry_from_cache = True if removed_entry_from_cache: # write cache_map with non-existent entries removed self._write_cache_map(cache_dir, cache_map) if matching_unmodified_directory is not None: return matching_unmodified_directory # if we're given a full file path, look up a matching file in the cache else: cached_time = cache_map.get(path, None) if cached_time: return path if compare_timestamps( _get_modified_time(path), cached_time) else None # return most recently cached and unmodified file OR # None if there are no unmodified files for cached_file_path, cached_time in sorted( cache_map.items(), key=operator.itemgetter(1), reverse=True): if compare_timestamps(_get_modified_time(cached_file_path), cached_time): return cached_file_path return None