def test_cache_store_get():
    tmp_dir = tempfile.mkdtemp()
    my_cache = cache.Cache(cache_root_dir=tmp_dir)

    path1 = utils.touch(
        os.path.join(my_cache.get_cache_dir(101201), "file1.ext"))
    my_cache.add(file_handle_id=101201, path=path1)

    path2 = utils.touch(
        os.path.join(my_cache.get_cache_dir(101202), "file2.ext"))
    my_cache.add(file_handle_id=101202, path=path2)

    # set path3's mtime to be later than path2's
    new_time_stamp = cache._get_modified_time(path2) + 2

    path3 = utils.touch(os.path.join(tmp_dir, "foo", "file2.ext"),
                        (new_time_stamp, new_time_stamp))
    my_cache.add(file_handle_id=101202, path=path3)

    a_file = my_cache.get(file_handle_id=101201)
    assert utils.equal_paths(a_file, path1)

    a_file = my_cache.get(file_handle_id=101201, path=path1)
    assert utils.equal_paths(a_file, path1)

    a_file = my_cache.get(file_handle_id=101201,
                          path=my_cache.get_cache_dir(101201))
    assert utils.equal_paths(a_file, path1)

    b_file = my_cache.get(file_handle_id=101202, path=os.path.dirname(path2))
    assert utils.equal_paths(b_file, path2)

    b_file = my_cache.get(file_handle_id=101202, path=os.path.dirname(path3))
    assert utils.equal_paths(b_file, path3)

    not_in_cache_file = my_cache.get(file_handle_id=101203, path=tmp_dir)
    assert not_in_cache_file is None

    removed = my_cache.remove(file_handle_id=101201, path=path1, delete=True)
    assert utils.normalize_path(path1) in removed
    assert len(removed) == 1
    assert my_cache.get(file_handle_id=101201) is None

    removed = my_cache.remove(file_handle_id=101202, path=path3, delete=True)
    b_file = my_cache.get(file_handle_id=101202)
    assert utils.normalize_path(path3) in removed
    assert len(removed) == 1
    assert utils.equal_paths(b_file, path2)

    removed = my_cache.remove(file_handle_id=101202, delete=True)
    assert utils.normalize_path(path2) in removed
    assert len(removed) == 1
    assert my_cache.get(file_handle_id=101202) is None
Example #2
0
def test_download_file_entity__correct_local_state(syn):
    mock_cache_path = utils.normalize_path("/i/will/show/you/the/path/yi.txt")
    file_entity = File(parentId="syn123")
    file_entity.dataFileHandleId = 123
    with patch.object(syn.cache, 'get', return_value=mock_cache_path):
        syn._download_file_entity(downloadLocation=None,
                                  entity=file_entity,
                                  ifcollision="overwrite.local",
                                  submission=None)
        assert mock_cache_path == utils.normalize_path(file_entity.path)
        assert os.path.dirname(mock_cache_path) == file_entity.cacheDir
        assert 1 == len(file_entity.files)
        assert os.path.basename(mock_cache_path) == file_entity.files[0]
Example #3
0
def test_normalize_path():
    # tests should pass on reasonable OSes and also on windows

    # resolves relative paths
    assert len(utils.normalize_path('asdf.txt')) > 8

    # doesn't resolve home directory references
    # assert '~' not in utils.normalize_path('~/asdf.txt')

    # converts back slashes to forward slashes
    assert utils.normalize_path('\\windows\\why\\why\\why.txt')

    # what's the right thing to do for None?
    assert utils.normalize_path(None) is None
def test_download_local_file_URL_path(syn, project, schedule_for_cleanup):
    path = utils.make_bogus_data_file()
    schedule_for_cleanup(path)

    filehandle = create_external_file_handle(syn, path, mimetype=None, file_size=None)

    localFileEntity = syn.store(File(dataFileHandleId=filehandle['id'], parent=project))
    e = syn.get(localFileEntity.id)
    assert path == utils.normalize_path(e.path)
def test_store__changing_externalURL_by_changing_path(syn, project, schedule_for_cleanup):
    url = 'https://www.synapse.org/Portal/clear.cache.gif'
    ext = syn.store(File(url, name="test", parent=project, synapseStore=False))

    # perform a syn.get so the filename changes
    ext = syn.get(ext)

    # create a temp file
    temp_path = utils.make_bogus_data_file()
    schedule_for_cleanup(temp_path)

    ext.synapseStore = False
    ext.path = temp_path
    ext = syn.store(ext)

    # do a get to make sure filehandle has been updated correctly
    ext = syn.get(ext.id, downloadFile=True)

    assert ext.externalURL != url
    assert utils.normalize_path(temp_path) == utils.file_url_to_path(ext.externalURL)
    assert temp_path == utils.normalize_path(ext.path)
    assert not ext.synapseStore
Example #6
0
    def add(self, file_handle_id, path):
        """
        Add a file to the cache
        """
        if not path or not os.path.exists(path):
            raise ValueError("Can't find file \"%s\"" % path)

        cache_dir = self.get_cache_dir(file_handle_id)
        with Lock(self.cache_map_file_name, dir=cache_dir):
            cache_map = self._read_cache_map(cache_dir)

            path = utils.normalize_path(path)
            # write .000 milliseconds for backward compatibility
            cache_map[path] = epoch_time_to_iso(
                math.floor(_get_modified_time(path)))
            self._write_cache_map(cache_dir, cache_map)

        return cache_map
Example #7
0
    def contains(self, file_handle_id, path):
        """
        Given a file and file_handle_id, return True if an unmodified cached
        copy of the file exists at the exact path given or False otherwise.
        :param file_handle_id:
        :param path: file path at which to look for a cached copy
        """
        cache_dir = self.get_cache_dir(file_handle_id)
        if not os.path.exists(cache_dir):
            return False

        with Lock(self.cache_map_file_name, dir=cache_dir):
            cache_map = self._read_cache_map(cache_dir)

            path = utils.normalize_path(path)

            cached_time = cache_map.get(path, None)
            if cached_time:
                return compare_timestamps(_get_modified_time(path),
                                          cached_time)
        return False
Example #8
0
    def remove(self, file_handle_id, path=None, delete=None):
        """
        Remove a file from the cache.

        :param file_handle_id: Will also extract file handle id from either a File or file handle
        :param path: If the given path is None, remove (and potentially delete)
                     all cached copies. If the path is that of a file in the
                     .cacheMap file, remove it.

        :returns: A list of files removed
        """
        removed = []
        cache_dir = self.get_cache_dir(file_handle_id)

        # if we've passed an entity and not a path, get path from entity
        if path is None and isinstance(
                file_handle_id,
                collections.abc.Mapping) and 'path' in file_handle_id:
            path = file_handle_id['path']

        with Lock(self.cache_map_file_name, dir=cache_dir):
            cache_map = self._read_cache_map(cache_dir)

            if path is None:
                for path in cache_map:
                    if delete is True and os.path.exists(path):
                        os.remove(path)
                    removed.append(path)
                cache_map = {}
            else:
                path = utils.normalize_path(path)
                if path in cache_map:
                    if delete is True and os.path.exists(path):
                        os.remove(path)
                    del cache_map[path]
                    removed.append(path)

            self._write_cache_map(cache_dir, cache_map)

        return removed
Example #9
0
def _makeManifest(content, schedule_for_cleanup):
    with tempfile.NamedTemporaryFile(mode='w', suffix=".dat", delete=False) as f:
        f.write(content)
        filepath = utils.normalize_path(f.name)
    schedule_for_cleanup(filepath)
    return filepath
Example #10
0
def test_cache_rules():
    # Cache should (in order of preference):
    #
    # 1. DownloadLocation specified:
    #   a. return exact match (unmodified file at the same path)
    #   b. return an unmodified file at another location,
    #      copy to downloadLocation subject to ifcollision
    #   c. download file to downloadLocation subject to ifcollision
    #
    # 2. DownloadLocation *not* specified:
    #   a. return an unmodified file at another location
    #   b. download file to cache_dir overwritting any existing file
    tmp_dir = tempfile.mkdtemp()
    my_cache = cache.Cache(cache_root_dir=tmp_dir)

    # put file in cache dir
    path1 = utils.touch(os.path.join(my_cache.get_cache_dir(101201), "file1.ext"))
    my_cache.add(file_handle_id=101201, path=path1)

    new_time_stamp = cache._get_modified_time(path1) + 1
    path2 = utils.touch(os.path.join(tmp_dir, "not_in_cache", "file1.ext"), (new_time_stamp, new_time_stamp))
    my_cache.add(file_handle_id=101201, path=path2)

    new_time_stamp = cache._get_modified_time(path2) + 1
    path3 = utils.touch(os.path.join(tmp_dir, "also_not_in_cache", "file1.ext"), (new_time_stamp, new_time_stamp))
    my_cache.add(file_handle_id=101201, path=path3)

    # DownloadLocation specified, found exact match
    assert utils.equal_paths(my_cache.get(file_handle_id=101201, path=path2), path2)

    # DownloadLocation specified, no match, get most recent
    path = my_cache.get(file_handle_id=101201, path=os.path.join(tmp_dir, "file_is_not_here", "file1.ext"))
    assert utils.equal_paths(path, path3)

    # DownloadLocation specified as a directory, not in cache, get most recent
    empty_dir = os.path.join(tmp_dir, "empty_directory")
    os.makedirs(empty_dir)
    path = my_cache.get(file_handle_id=101201, path=empty_dir)
    assert utils.equal_paths(path, path3)

    # path2 is now modified
    new_time_stamp = cache._get_modified_time(path2) + 2
    utils.touch(path2, (new_time_stamp, new_time_stamp))

    # test cache.contains
    assert_false(my_cache.contains(file_handle_id=101201, path=empty_dir))
    assert_false(my_cache.contains(file_handle_id=101201, path=path2))
    assert_false(my_cache.contains(file_handle_id=101999, path=path2))
    assert_true(my_cache.contains(file_handle_id=101201, path=path1))
    assert_true(my_cache.contains(file_handle_id=101201, path=path3))

    # Get file from alternate location. Do we care which file we get?
    assert_is_none(my_cache.get(file_handle_id=101201, path=path2))
    assert_in(my_cache.get(file_handle_id=101201), [utils.normalize_path(path1), utils.normalize_path(path3)])

    # Download uncached file to a specified download location
    assert_is_none(my_cache.get(file_handle_id=101202, path=os.path.join(tmp_dir, "not_in_cache")))

    # No downloadLocation specified, get file from alternate location. Do we care which file we get?
    assert_is_not_none(my_cache.get(file_handle_id=101201))
    assert_in(my_cache.get(file_handle_id=101201), [utils.normalize_path(path1), utils.normalize_path(path3)])

    # test case 2b.
    assert_is_none(my_cache.get(file_handle_id=101202))
Example #11
0
    def get(self, file_handle_id, path=None):
        """
        Retrieve a file with the given file handle from the cache.

        :param file_handle_id:
        :param path: If the given path is None, look for a cached copy of the
                     file in the cache directory. If the path is a directory,
                     look there for a cached copy. If a full file-path is
                     given, only check whether that exact file exists and is
                     unmodified since it was cached.

        :returns: Either a file path, if an unmodified cached copy of the file
                  exists in the specified location or None if it does not
        """
        cache_dir = self.get_cache_dir(file_handle_id)
        if not os.path.exists(cache_dir):
            return None

        with Lock(self.cache_map_file_name, dir=cache_dir):
            cache_map = self._read_cache_map(cache_dir)

            path = utils.normalize_path(path)

            # If the caller specifies a path and that path exists in the cache
            # but has been modified, we need to indicate no match by returning
            # None. The logic for updating a synapse entity depends on this to
            # determine the need to upload a new file.

            if path is not None:
                # If we're given a path to a directory, look for a cached file in that directory
                if os.path.isdir(path):
                    matching_unmodified_directory = None
                    removed_entry_from_cache = False  # determines if cache_map needs to be rewritten to disk

                    # iterate a copy of cache_map to allow modifying original cache_map
                    for cached_file_path, cached_time in dict(
                            cache_map).items():
                        if path == os.path.dirname(cached_file_path):
                            # compare_timestamps has an implicit check for whether the path exists
                            if compare_timestamps(
                                    _get_modified_time(cached_file_path),
                                    cached_time):
                                # "break" instead of "return" to write removed invalid entries to disk if necessary
                                matching_unmodified_directory = cached_file_path
                                break
                            else:
                                # remove invalid cache entries pointing to files that that no longer exist
                                # or have been modified
                                del cache_map[cached_file_path]
                                removed_entry_from_cache = True

                    if removed_entry_from_cache:
                        # write cache_map with non-existent entries removed
                        self._write_cache_map(cache_dir, cache_map)

                    if matching_unmodified_directory is not None:
                        return matching_unmodified_directory

                # if we're given a full file path, look up a matching file in the cache
                else:
                    cached_time = cache_map.get(path, None)
                    if cached_time:
                        return path if compare_timestamps(
                            _get_modified_time(path), cached_time) else None

            # return most recently cached and unmodified file OR
            # None if there are no unmodified files
            for cached_file_path, cached_time in sorted(
                    cache_map.items(), key=operator.itemgetter(1),
                    reverse=True):
                if compare_timestamps(_get_modified_time(cached_file_path),
                                      cached_time):
                    return cached_file_path
            return None